1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
4 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5 and Martin Simmons (@harleqn.co.uk).
6 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published
12 by the Free Software Foundation; either version 3, or (at your
13 option) any later version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
32 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
48 #include "integrate.h"
51 #include "target-def.h"
53 #include "langhooks.h"
56 /* Forward definitions of types. */
57 typedef struct minipool_node Mnode;
58 typedef struct minipool_fixup Mfix;
60 const struct attribute_spec arm_attribute_table[];
62 void (*arm_lang_output_object_attributes_hook)(void);
64 /* Forward function declarations. */
65 static arm_stack_offsets *arm_get_frame_offsets (void);
66 static void arm_add_gc_roots (void);
67 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
68 HOST_WIDE_INT, rtx, rtx, int, int);
69 static unsigned bit_count (unsigned long);
70 static int arm_address_register_rtx_p (rtx, int);
71 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
72 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
73 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
74 inline static int thumb1_index_register_rtx_p (rtx, int);
75 static int thumb_far_jump_used_p (void);
76 static bool thumb_force_lr_save (void);
77 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
78 static rtx emit_sfm (int, int);
79 static unsigned arm_size_return_regs (void);
80 static bool arm_assemble_integer (rtx, unsigned int, int);
81 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
82 static arm_cc get_arm_condition_code (rtx);
83 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
84 static rtx is_jump_table (rtx);
85 static const char *output_multi_immediate (rtx *, const char *, const char *,
87 static const char *shift_op (rtx, HOST_WIDE_INT *);
88 static struct machine_function *arm_init_machine_status (void);
89 static void thumb_exit (FILE *, int);
90 static rtx is_jump_table (rtx);
91 static HOST_WIDE_INT get_jump_table_size (rtx);
92 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
93 static Mnode *add_minipool_forward_ref (Mfix *);
94 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
95 static Mnode *add_minipool_backward_ref (Mfix *);
96 static void assign_minipool_offsets (Mfix *);
97 static void arm_print_value (FILE *, rtx);
98 static void dump_minipool (rtx);
99 static int arm_barrier_cost (rtx);
100 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
101 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
102 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
104 static void arm_reorg (void);
105 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
106 static unsigned long arm_compute_save_reg0_reg12_mask (void);
107 static unsigned long arm_compute_save_reg_mask (void);
108 static unsigned long arm_isr_value (tree);
109 static unsigned long arm_compute_func_type (void);
110 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
111 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
112 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
113 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
115 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
116 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
117 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
118 static int arm_comp_type_attributes (const_tree, const_tree);
119 static void arm_set_default_type_attributes (tree);
120 static int arm_adjust_cost (rtx, rtx, rtx, int);
121 static int count_insns_for_constant (HOST_WIDE_INT, int);
122 static int arm_get_strip_length (int);
123 static bool arm_function_ok_for_sibcall (tree, tree);
124 static void arm_internal_label (FILE *, const char *, unsigned long);
125 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
127 static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
128 static bool arm_size_rtx_costs (rtx, int, int, int *);
129 static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
130 static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
131 static bool arm_xscale_rtx_costs (rtx, int, int, int *);
132 static bool arm_9e_rtx_costs (rtx, int, int, int *);
133 static int arm_address_cost (rtx);
134 static bool arm_memory_load_p (rtx);
135 static bool arm_cirrus_insn_p (rtx);
136 static void cirrus_reorg (rtx);
137 static void arm_init_builtins (void);
138 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
139 static void arm_init_iwmmxt_builtins (void);
140 static rtx safe_vector_operand (rtx, enum machine_mode);
141 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
142 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
143 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
144 static void emit_constant_insn (rtx cond, rtx pattern);
145 static rtx emit_set_insn (rtx, rtx);
146 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
149 #ifdef OBJECT_FORMAT_ELF
150 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
151 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
154 static void arm_encode_section_info (tree, rtx, int);
157 static void arm_file_end (void);
158 static void arm_file_start (void);
160 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
162 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
163 enum machine_mode, const_tree, bool);
164 static bool arm_promote_prototypes (const_tree);
165 static bool arm_default_short_enums (void);
166 static bool arm_align_anon_bitfield (void);
167 static bool arm_return_in_msb (const_tree);
168 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
169 #ifdef TARGET_UNWIND_INFO
170 static void arm_unwind_emit (FILE *, rtx);
171 static bool arm_output_ttype (rtx);
173 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
175 static tree arm_cxx_guard_type (void);
176 static bool arm_cxx_guard_mask_bit (void);
177 static tree arm_get_cookie_size (tree);
178 static bool arm_cookie_has_size (void);
179 static bool arm_cxx_cdtor_returns_this (void);
180 static bool arm_cxx_key_method_may_be_inline (void);
181 static void arm_cxx_determine_class_data_visibility (tree);
182 static bool arm_cxx_class_data_always_comdat (void);
183 static bool arm_cxx_use_aeabi_atexit (void);
184 static void arm_init_libfuncs (void);
185 static bool arm_handle_option (size_t, const char *, int);
186 static void arm_target_help (void);
187 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
188 static bool arm_cannot_copy_insn_p (rtx);
189 static bool arm_tls_symbol_p (rtx x);
190 static int arm_issue_rate (void);
191 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
194 /* Initialize the GCC target structure. */
195 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
196 #undef TARGET_MERGE_DECL_ATTRIBUTES
197 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
200 #undef TARGET_ATTRIBUTE_TABLE
201 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
203 #undef TARGET_ASM_FILE_START
204 #define TARGET_ASM_FILE_START arm_file_start
205 #undef TARGET_ASM_FILE_END
206 #define TARGET_ASM_FILE_END arm_file_end
208 #undef TARGET_ASM_ALIGNED_SI_OP
209 #define TARGET_ASM_ALIGNED_SI_OP NULL
210 #undef TARGET_ASM_INTEGER
211 #define TARGET_ASM_INTEGER arm_assemble_integer
213 #undef TARGET_ASM_FUNCTION_PROLOGUE
214 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
216 #undef TARGET_ASM_FUNCTION_EPILOGUE
217 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
219 #undef TARGET_DEFAULT_TARGET_FLAGS
220 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
221 #undef TARGET_HANDLE_OPTION
222 #define TARGET_HANDLE_OPTION arm_handle_option
224 #define TARGET_HELP arm_target_help
226 #undef TARGET_COMP_TYPE_ATTRIBUTES
227 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
229 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
230 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
232 #undef TARGET_SCHED_ADJUST_COST
233 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
235 #undef TARGET_ENCODE_SECTION_INFO
237 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
239 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
242 #undef TARGET_STRIP_NAME_ENCODING
243 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
245 #undef TARGET_ASM_INTERNAL_LABEL
246 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
248 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
249 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
251 #undef TARGET_ASM_OUTPUT_MI_THUNK
252 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
253 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
254 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
256 /* This will be overridden in arm_override_options. */
257 #undef TARGET_RTX_COSTS
258 #define TARGET_RTX_COSTS arm_slowmul_rtx_costs
259 #undef TARGET_ADDRESS_COST
260 #define TARGET_ADDRESS_COST arm_address_cost
262 #undef TARGET_SHIFT_TRUNCATION_MASK
263 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
264 #undef TARGET_VECTOR_MODE_SUPPORTED_P
265 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
267 #undef TARGET_MACHINE_DEPENDENT_REORG
268 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
270 #undef TARGET_INIT_BUILTINS
271 #define TARGET_INIT_BUILTINS arm_init_builtins
272 #undef TARGET_EXPAND_BUILTIN
273 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
275 #undef TARGET_INIT_LIBFUNCS
276 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
278 #undef TARGET_PROMOTE_FUNCTION_ARGS
279 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
280 #undef TARGET_PROMOTE_FUNCTION_RETURN
281 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
282 #undef TARGET_PROMOTE_PROTOTYPES
283 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
284 #undef TARGET_PASS_BY_REFERENCE
285 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
286 #undef TARGET_ARG_PARTIAL_BYTES
287 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
289 #undef TARGET_SETUP_INCOMING_VARARGS
290 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
292 #undef TARGET_DEFAULT_SHORT_ENUMS
293 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
295 #undef TARGET_ALIGN_ANON_BITFIELD
296 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
298 #undef TARGET_NARROW_VOLATILE_BITFIELD
299 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
301 #undef TARGET_CXX_GUARD_TYPE
302 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
304 #undef TARGET_CXX_GUARD_MASK_BIT
305 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
307 #undef TARGET_CXX_GET_COOKIE_SIZE
308 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
310 #undef TARGET_CXX_COOKIE_HAS_SIZE
311 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
313 #undef TARGET_CXX_CDTOR_RETURNS_THIS
314 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
316 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
317 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
319 #undef TARGET_CXX_USE_AEABI_ATEXIT
320 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
322 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
323 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
324 arm_cxx_determine_class_data_visibility
326 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
327 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
329 #undef TARGET_RETURN_IN_MSB
330 #define TARGET_RETURN_IN_MSB arm_return_in_msb
332 #undef TARGET_MUST_PASS_IN_STACK
333 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
335 #ifdef TARGET_UNWIND_INFO
336 #undef TARGET_UNWIND_EMIT
337 #define TARGET_UNWIND_EMIT arm_unwind_emit
339 /* EABI unwinding tables use a different format for the typeinfo tables. */
340 #undef TARGET_ASM_TTYPE
341 #define TARGET_ASM_TTYPE arm_output_ttype
343 #undef TARGET_ARM_EABI_UNWINDER
344 #define TARGET_ARM_EABI_UNWINDER true
345 #endif /* TARGET_UNWIND_INFO */
347 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
348 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
350 #undef TARGET_CANNOT_COPY_INSN_P
351 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
354 #undef TARGET_HAVE_TLS
355 #define TARGET_HAVE_TLS true
358 #undef TARGET_CANNOT_FORCE_CONST_MEM
359 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
361 #undef TARGET_SCHED_ISSUE_RATE
362 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
364 #undef TARGET_MANGLE_TYPE
365 #define TARGET_MANGLE_TYPE arm_mangle_type
368 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
369 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
372 struct gcc_target targetm = TARGET_INITIALIZER;
374 /* Obstack for minipool constant handling. */
375 static struct obstack minipool_obstack;
376 static char * minipool_startobj;
378 /* The maximum number of insns skipped which
379 will be conditionalised if possible. */
380 static int max_insns_skipped = 5;
382 extern FILE * asm_out_file;
384 /* True if we are currently building a constant table. */
385 int making_const_table;
387 /* Define the information needed to generate branch insns. This is
388 stored from the compare operation. */
389 rtx arm_compare_op0, arm_compare_op1;
391 /* The processor for which instructions should be scheduled. */
392 enum processor_type arm_tune = arm_none;
394 /* The default processor used if not overridden by commandline. */
395 static enum processor_type arm_default_cpu = arm_none;
397 /* Which floating point model to use. */
398 enum arm_fp_model arm_fp_model;
400 /* Which floating point hardware is available. */
401 enum fputype arm_fpu_arch;
403 /* Which floating point hardware to schedule for. */
404 enum fputype arm_fpu_tune;
406 /* Whether to use floating point hardware. */
407 enum float_abi_type arm_float_abi;
409 /* Which ABI to use. */
410 enum arm_abi_type arm_abi;
412 /* Which thread pointer model to use. */
413 enum arm_tp_type target_thread_pointer = TP_AUTO;
415 /* Used to parse -mstructure_size_boundary command line option. */
416 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
418 /* Used for Thumb call_via trampolines. */
419 rtx thumb_call_via_label[14];
420 static int thumb_call_reg_needed;
422 /* Bit values used to identify processor capabilities. */
423 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
424 #define FL_ARCH3M (1 << 1) /* Extended multiply */
425 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
426 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
427 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
428 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
429 #define FL_THUMB (1 << 6) /* Thumb aware */
430 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
431 #define FL_STRONG (1 << 8) /* StrongARM */
432 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
433 #define FL_XSCALE (1 << 10) /* XScale */
434 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
435 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
436 media instructions. */
437 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
438 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
439 Note: ARM6 & 7 derivatives only. */
440 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
441 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
442 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
444 #define FL_DIV (1 << 18) /* Hardware divide. */
445 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
446 #define FL_NEON (1 << 20) /* Neon instructions. */
448 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
450 #define FL_FOR_ARCH2 FL_NOTM
451 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
452 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
453 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
454 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
455 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
456 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
457 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
458 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
459 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
460 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
461 #define FL_FOR_ARCH6J FL_FOR_ARCH6
462 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
463 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
464 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
465 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
466 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
467 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
468 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
469 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
470 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
472 /* The bits in this mask specify which
473 instructions we are allowed to generate. */
474 static unsigned long insn_flags = 0;
476 /* The bits in this mask specify which instruction scheduling options should
478 static unsigned long tune_flags = 0;
480 /* The following are used in the arm.md file as equivalents to bits
481 in the above two flag variables. */
483 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
486 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
489 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
492 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
495 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
498 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
501 /* Nonzero if this chip supports the ARM 6K extensions. */
504 /* Nonzero if instructions not present in the 'M' profile can be used. */
505 int arm_arch_notm = 0;
507 /* Nonzero if this chip can benefit from load scheduling. */
508 int arm_ld_sched = 0;
510 /* Nonzero if this chip is a StrongARM. */
511 int arm_tune_strongarm = 0;
513 /* Nonzero if this chip is a Cirrus variant. */
514 int arm_arch_cirrus = 0;
516 /* Nonzero if this chip supports Intel Wireless MMX technology. */
517 int arm_arch_iwmmxt = 0;
519 /* Nonzero if this chip is an XScale. */
520 int arm_arch_xscale = 0;
522 /* Nonzero if tuning for XScale */
523 int arm_tune_xscale = 0;
525 /* Nonzero if we want to tune for stores that access the write-buffer.
526 This typically means an ARM6 or ARM7 with MMU or MPU. */
527 int arm_tune_wbuf = 0;
529 /* Nonzero if generating Thumb instructions. */
532 /* Nonzero if we should define __THUMB_INTERWORK__ in the
534 XXX This is a bit of a hack, it's intended to help work around
535 problems in GLD which doesn't understand that armv5t code is
536 interworking clean. */
537 int arm_cpp_interwork = 0;
539 /* Nonzero if chip supports Thumb 2. */
542 /* Nonzero if chip supports integer division instruction. */
545 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
546 must report the mode of the memory reference from PRINT_OPERAND to
547 PRINT_OPERAND_ADDRESS. */
548 enum machine_mode output_memory_reference_mode;
550 /* The register number to be used for the PIC offset register. */
551 unsigned arm_pic_register = INVALID_REGNUM;
553 /* Set to 1 when a return insn is output, this means that the epilogue
555 int return_used_this_function;
557 /* Set to 1 after arm_reorg has started. Reset to start at the start of
558 the next function. */
559 static int after_arm_reorg = 0;
561 /* The maximum number of insns to be used when loading a constant. */
562 static int arm_constant_limit = 3;
564 /* For an explanation of these variables, see final_prescan_insn below. */
566 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
567 enum arm_cond_code arm_current_cc;
569 int arm_target_label;
570 /* The number of conditionally executed insns, including the current insn. */
571 int arm_condexec_count = 0;
572 /* A bitmask specifying the patterns for the IT block.
573 Zero means do not output an IT block before this insn. */
574 int arm_condexec_mask = 0;
575 /* The number of bits used in arm_condexec_mask. */
576 int arm_condexec_masklen = 0;
578 /* The condition codes of the ARM, and the inverse function. */
579 static const char * const arm_condition_codes[] =
581 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
582 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
585 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
586 #define streq(string1, string2) (strcmp (string1, string2) == 0)
588 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
589 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
590 | (1 << PIC_OFFSET_TABLE_REGNUM)))
592 /* Initialization code. */
596 const char *const name;
597 enum processor_type core;
599 const unsigned long flags;
600 bool (* rtx_costs) (rtx, int, int, int *);
603 /* Not all of these give usefully different compilation alternatives,
604 but there is no simple way of generalizing them. */
605 static const struct processors all_cores[] =
608 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
609 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
610 #include "arm-cores.def"
612 {NULL, arm_none, NULL, 0, NULL}
615 static const struct processors all_architectures[] =
617 /* ARM Architectures */
618 /* We don't specify rtx_costs here as it will be figured out
621 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
622 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
623 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
624 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
625 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
626 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
627 implementations that support it, so we will leave it out for now. */
628 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
629 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
630 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
631 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
632 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
633 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
634 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
635 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
636 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
637 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
638 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
639 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
640 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
641 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
642 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
643 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
644 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
645 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
646 {NULL, arm_none, NULL, 0 , NULL}
649 struct arm_cpu_select
653 const struct processors * processors;
656 /* This is a magic structure. The 'string' field is magically filled in
657 with a pointer to the value specified by the user on the command line
658 assuming that the user has specified such a value. */
660 static struct arm_cpu_select arm_select[] =
662 /* string name processors */
663 { NULL, "-mcpu=", all_cores },
664 { NULL, "-march=", all_architectures },
665 { NULL, "-mtune=", all_cores }
668 /* Defines representing the indexes into the above table. */
669 #define ARM_OPT_SET_CPU 0
670 #define ARM_OPT_SET_ARCH 1
671 #define ARM_OPT_SET_TUNE 2
673 /* The name of the preprocessor macro to define for this architecture. */
675 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
684 /* Available values for -mfpu=. */
686 static const struct fpu_desc all_fpus[] =
688 {"fpa", FPUTYPE_FPA},
689 {"fpe2", FPUTYPE_FPA_EMU2},
690 {"fpe3", FPUTYPE_FPA_EMU2},
691 {"maverick", FPUTYPE_MAVERICK},
692 {"vfp", FPUTYPE_VFP},
693 {"vfp3", FPUTYPE_VFP3},
694 {"neon", FPUTYPE_NEON}
698 /* Floating point models used by the different hardware.
699 See fputype in arm.h. */
701 static const enum fputype fp_model_for_fpu[] =
703 /* No FP hardware. */
704 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
705 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
706 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
707 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
708 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
709 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
710 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
711 ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
718 enum float_abi_type abi_type;
722 /* Available values for -mfloat-abi=. */
724 static const struct float_abi all_float_abis[] =
726 {"soft", ARM_FLOAT_ABI_SOFT},
727 {"softfp", ARM_FLOAT_ABI_SOFTFP},
728 {"hard", ARM_FLOAT_ABI_HARD}
735 enum arm_abi_type abi_type;
739 /* Available values for -mabi=. */
741 static const struct abi_name arm_all_abis[] =
743 {"apcs-gnu", ARM_ABI_APCS},
744 {"atpcs", ARM_ABI_ATPCS},
745 {"aapcs", ARM_ABI_AAPCS},
746 {"iwmmxt", ARM_ABI_IWMMXT},
747 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
750 /* Supported TLS relocations. */
760 /* Emit an insn that's a simple single-set. Both the operands must be known
763 emit_set_insn (rtx x, rtx y)
765 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
768 /* Return the number of bits set in VALUE. */
770 bit_count (unsigned long value)
772 unsigned long count = 0;
777 value &= value - 1; /* Clear the least-significant set bit. */
783 /* Set up library functions unique to ARM. */
786 arm_init_libfuncs (void)
788 /* There are no special library functions unless we are using the
793 /* The functions below are described in Section 4 of the "Run-Time
794 ABI for the ARM architecture", Version 1.0. */
796 /* Double-precision floating-point arithmetic. Table 2. */
797 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
798 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
799 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
800 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
801 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
803 /* Double-precision comparisons. Table 3. */
804 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
805 set_optab_libfunc (ne_optab, DFmode, NULL);
806 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
807 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
808 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
809 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
810 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
812 /* Single-precision floating-point arithmetic. Table 4. */
813 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
814 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
815 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
816 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
817 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
819 /* Single-precision comparisons. Table 5. */
820 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
821 set_optab_libfunc (ne_optab, SFmode, NULL);
822 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
823 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
824 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
825 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
826 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
828 /* Floating-point to integer conversions. Table 6. */
829 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
830 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
831 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
832 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
833 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
834 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
835 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
836 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
838 /* Conversions between floating types. Table 7. */
839 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
840 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
842 /* Integer to floating-point conversions. Table 8. */
843 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
844 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
845 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
846 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
847 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
848 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
849 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
850 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
852 /* Long long. Table 9. */
853 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
854 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
855 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
856 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
857 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
858 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
859 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
860 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
862 /* Integer (32/32->32) division. \S 4.3.1. */
863 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
864 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
866 /* The divmod functions are designed so that they can be used for
867 plain division, even though they return both the quotient and the
868 remainder. The quotient is returned in the usual location (i.e.,
869 r0 for SImode, {r0, r1} for DImode), just as would be expected
870 for an ordinary division routine. Because the AAPCS calling
871 conventions specify that all of { r0, r1, r2, r3 } are
872 callee-saved registers, there is no need to tell the compiler
873 explicitly that those registers are clobbered by these
875 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
876 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
878 /* For SImode division the ABI provides div-without-mod routines,
880 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
881 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
883 /* We don't have mod libcalls. Fortunately gcc knows how to use the
884 divmod libcalls instead. */
885 set_optab_libfunc (smod_optab, DImode, NULL);
886 set_optab_libfunc (umod_optab, DImode, NULL);
887 set_optab_libfunc (smod_optab, SImode, NULL);
888 set_optab_libfunc (umod_optab, SImode, NULL);
891 /* Implement TARGET_HANDLE_OPTION. */
894 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
899 arm_select[1].string = arg;
903 arm_select[0].string = arg;
906 case OPT_mhard_float:
907 target_float_abi_name = "hard";
910 case OPT_msoft_float:
911 target_float_abi_name = "soft";
915 arm_select[2].string = arg;
924 arm_target_help (void)
927 static int columns = 0;
930 /* If we have not done so already, obtain the desired maximum width of
931 the output. Note - this is a duplication of the code at the start of
932 gcc/opts.c:print_specific_help() - the two copies should probably be
933 replaced by a single function. */
938 GET_ENVIRONMENT (p, "COLUMNS");
941 int value = atoi (p);
948 /* Use a reasonable default. */
952 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
954 /* The - 2 is because we know that the last entry in the array is NULL. */
955 i = ARRAY_SIZE (all_cores) - 2;
957 printf (" %s", all_cores[i].name);
958 remaining = columns - (strlen (all_cores[i].name) + 4);
959 gcc_assert (remaining >= 0);
963 int len = strlen (all_cores[i].name);
965 if (remaining > len + 2)
967 printf (", %s", all_cores[i].name);
968 remaining -= len + 2;
974 printf ("\n %s", all_cores[i].name);
975 remaining = columns - (len + 4);
979 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
981 i = ARRAY_SIZE (all_architectures) - 2;
984 printf (" %s", all_architectures[i].name);
985 remaining = columns - (strlen (all_architectures[i].name) + 4);
986 gcc_assert (remaining >= 0);
990 int len = strlen (all_architectures[i].name);
992 if (remaining > len + 2)
994 printf (", %s", all_architectures[i].name);
995 remaining -= len + 2;
1001 printf ("\n %s", all_architectures[i].name);
1002 remaining = columns - (len + 4);
1009 /* Fix up any incompatible options that the user has specified.
1010 This has now turned into a maze. */
1012 arm_override_options (void)
1015 enum processor_type target_arch_cpu = arm_none;
1017 /* Set up the flags based on the cpu/architecture selected by the user. */
1018 for (i = ARRAY_SIZE (arm_select); i--;)
1020 struct arm_cpu_select * ptr = arm_select + i;
1022 if (ptr->string != NULL && ptr->string[0] != '\0')
1024 const struct processors * sel;
1026 for (sel = ptr->processors; sel->name != NULL; sel++)
1027 if (streq (ptr->string, sel->name))
1029 /* Set the architecture define. */
1030 if (i != ARM_OPT_SET_TUNE)
1031 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1033 /* Determine the processor core for which we should
1034 tune code-generation. */
1035 if (/* -mcpu= is a sensible default. */
1036 i == ARM_OPT_SET_CPU
1037 /* -mtune= overrides -mcpu= and -march=. */
1038 || i == ARM_OPT_SET_TUNE)
1039 arm_tune = (enum processor_type) (sel - ptr->processors);
1041 /* Remember the CPU associated with this architecture.
1042 If no other option is used to set the CPU type,
1043 we'll use this to guess the most suitable tuning
1045 if (i == ARM_OPT_SET_ARCH)
1046 target_arch_cpu = sel->core;
1048 if (i != ARM_OPT_SET_TUNE)
1050 /* If we have been given an architecture and a processor
1051 make sure that they are compatible. We only generate
1052 a warning though, and we prefer the CPU over the
1054 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1055 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1058 insn_flags = sel->flags;
1064 if (sel->name == NULL)
1065 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1069 /* Guess the tuning options from the architecture if necessary. */
1070 if (arm_tune == arm_none)
1071 arm_tune = target_arch_cpu;
1073 /* If the user did not specify a processor, choose one for them. */
1074 if (insn_flags == 0)
1076 const struct processors * sel;
1077 unsigned int sought;
1078 enum processor_type cpu;
1080 cpu = TARGET_CPU_DEFAULT;
1081 if (cpu == arm_none)
1083 #ifdef SUBTARGET_CPU_DEFAULT
1084 /* Use the subtarget default CPU if none was specified by
1086 cpu = SUBTARGET_CPU_DEFAULT;
1088 /* Default to ARM6. */
1089 if (cpu == arm_none)
1092 sel = &all_cores[cpu];
1094 insn_flags = sel->flags;
1096 /* Now check to see if the user has specified some command line
1097 switch that require certain abilities from the cpu. */
1100 if (TARGET_INTERWORK || TARGET_THUMB)
1102 sought |= (FL_THUMB | FL_MODE32);
1104 /* There are no ARM processors that support both APCS-26 and
1105 interworking. Therefore we force FL_MODE26 to be removed
1106 from insn_flags here (if it was set), so that the search
1107 below will always be able to find a compatible processor. */
1108 insn_flags &= ~FL_MODE26;
1111 if (sought != 0 && ((sought & insn_flags) != sought))
1113 /* Try to locate a CPU type that supports all of the abilities
1114 of the default CPU, plus the extra abilities requested by
1116 for (sel = all_cores; sel->name != NULL; sel++)
1117 if ((sel->flags & sought) == (sought | insn_flags))
1120 if (sel->name == NULL)
1122 unsigned current_bit_count = 0;
1123 const struct processors * best_fit = NULL;
1125 /* Ideally we would like to issue an error message here
1126 saying that it was not possible to find a CPU compatible
1127 with the default CPU, but which also supports the command
1128 line options specified by the programmer, and so they
1129 ought to use the -mcpu=<name> command line option to
1130 override the default CPU type.
1132 If we cannot find a cpu that has both the
1133 characteristics of the default cpu and the given
1134 command line options we scan the array again looking
1135 for a best match. */
1136 for (sel = all_cores; sel->name != NULL; sel++)
1137 if ((sel->flags & sought) == sought)
1141 count = bit_count (sel->flags & insn_flags);
1143 if (count >= current_bit_count)
1146 current_bit_count = count;
1150 gcc_assert (best_fit);
1154 insn_flags = sel->flags;
1156 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1157 arm_default_cpu = (enum processor_type) (sel - all_cores);
1158 if (arm_tune == arm_none)
1159 arm_tune = arm_default_cpu;
1162 /* The processor for which we should tune should now have been
1164 gcc_assert (arm_tune != arm_none);
1166 tune_flags = all_cores[(int)arm_tune].flags;
1168 targetm.rtx_costs = arm_size_rtx_costs;
1170 targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
1172 /* Make sure that the processor choice does not conflict with any of the
1173 other command line choices. */
1174 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1175 error ("target CPU does not support ARM mode");
1177 if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
1179 warning (0, "target CPU does not support interworking" );
1180 target_flags &= ~MASK_INTERWORK;
1183 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1185 warning (0, "target CPU does not support THUMB instructions");
1186 target_flags &= ~MASK_THUMB;
1189 if (TARGET_APCS_FRAME && TARGET_THUMB)
1191 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1192 target_flags &= ~MASK_APCS_FRAME;
1195 /* Callee super interworking implies thumb interworking. Adding
1196 this to the flags here simplifies the logic elsewhere. */
1197 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1198 target_flags |= MASK_INTERWORK;
1200 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1201 from here where no function is being compiled currently. */
1202 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1203 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1205 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1206 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1208 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1209 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1211 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1213 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1214 target_flags |= MASK_APCS_FRAME;
1217 if (TARGET_POKE_FUNCTION_NAME)
1218 target_flags |= MASK_APCS_FRAME;
1220 if (TARGET_APCS_REENT && flag_pic)
1221 error ("-fpic and -mapcs-reent are incompatible");
1223 if (TARGET_APCS_REENT)
1224 warning (0, "APCS reentrant code not supported. Ignored");
1226 /* If this target is normally configured to use APCS frames, warn if they
1227 are turned off and debugging is turned on. */
1229 && write_symbols != NO_DEBUG
1230 && !TARGET_APCS_FRAME
1231 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1232 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1234 if (TARGET_APCS_FLOAT)
1235 warning (0, "passing floating point arguments in fp regs not yet supported");
1237 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1238 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1239 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1240 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1241 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1242 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1243 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1244 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1245 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1246 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1247 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1248 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1250 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1251 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1252 thumb_code = (TARGET_ARM == 0);
1253 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1254 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1255 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1256 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1258 /* V5 code we generate is completely interworking capable, so we turn off
1259 TARGET_INTERWORK here to avoid many tests later on. */
1261 /* XXX However, we must pass the right pre-processor defines to CPP
1262 or GLD can get confused. This is a hack. */
1263 if (TARGET_INTERWORK)
1264 arm_cpp_interwork = 1;
1267 target_flags &= ~MASK_INTERWORK;
1269 if (target_abi_name)
1271 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1273 if (streq (arm_all_abis[i].name, target_abi_name))
1275 arm_abi = arm_all_abis[i].abi_type;
1279 if (i == ARRAY_SIZE (arm_all_abis))
1280 error ("invalid ABI option: -mabi=%s", target_abi_name);
1283 arm_abi = ARM_DEFAULT_ABI;
1285 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1286 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1288 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1289 error ("iwmmxt abi requires an iwmmxt capable cpu");
1291 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1292 if (target_fpu_name == NULL && target_fpe_name != NULL)
1294 if (streq (target_fpe_name, "2"))
1295 target_fpu_name = "fpe2";
1296 else if (streq (target_fpe_name, "3"))
1297 target_fpu_name = "fpe3";
1299 error ("invalid floating point emulation option: -mfpe=%s",
1302 if (target_fpu_name != NULL)
1304 /* The user specified a FPU. */
1305 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1307 if (streq (all_fpus[i].name, target_fpu_name))
1309 arm_fpu_arch = all_fpus[i].fpu;
1310 arm_fpu_tune = arm_fpu_arch;
1311 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1315 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1316 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1320 #ifdef FPUTYPE_DEFAULT
1321 /* Use the default if it is specified for this platform. */
1322 arm_fpu_arch = FPUTYPE_DEFAULT;
1323 arm_fpu_tune = FPUTYPE_DEFAULT;
1325 /* Pick one based on CPU type. */
1326 /* ??? Some targets assume FPA is the default.
1327 if ((insn_flags & FL_VFP) != 0)
1328 arm_fpu_arch = FPUTYPE_VFP;
1331 if (arm_arch_cirrus)
1332 arm_fpu_arch = FPUTYPE_MAVERICK;
1334 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1336 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1337 arm_fpu_tune = FPUTYPE_FPA;
1339 arm_fpu_tune = arm_fpu_arch;
1340 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1341 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1344 if (target_float_abi_name != NULL)
1346 /* The user specified a FP ABI. */
1347 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1349 if (streq (all_float_abis[i].name, target_float_abi_name))
1351 arm_float_abi = all_float_abis[i].abi_type;
1355 if (i == ARRAY_SIZE (all_float_abis))
1356 error ("invalid floating point abi: -mfloat-abi=%s",
1357 target_float_abi_name);
1360 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1362 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1363 sorry ("-mfloat-abi=hard and VFP");
1365 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1366 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1367 will ever exist. GCC makes no attempt to support this combination. */
1368 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1369 sorry ("iWMMXt and hardware floating point");
1371 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1372 if (TARGET_THUMB2 && TARGET_IWMMXT)
1373 sorry ("Thumb-2 iWMMXt");
1375 /* If soft-float is specified then don't use FPU. */
1376 if (TARGET_SOFT_FLOAT)
1377 arm_fpu_arch = FPUTYPE_NONE;
1379 /* For arm2/3 there is no need to do any scheduling if there is only
1380 a floating point emulator, or we are doing software floating-point. */
1381 if ((TARGET_SOFT_FLOAT
1382 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1383 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1384 && (tune_flags & FL_MODE32) == 0)
1385 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1387 if (target_thread_switch)
1389 if (strcmp (target_thread_switch, "soft") == 0)
1390 target_thread_pointer = TP_SOFT;
1391 else if (strcmp (target_thread_switch, "auto") == 0)
1392 target_thread_pointer = TP_AUTO;
1393 else if (strcmp (target_thread_switch, "cp15") == 0)
1394 target_thread_pointer = TP_CP15;
1396 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1399 /* Use the cp15 method if it is available. */
1400 if (target_thread_pointer == TP_AUTO)
1402 if (arm_arch6k && !TARGET_THUMB)
1403 target_thread_pointer = TP_CP15;
1405 target_thread_pointer = TP_SOFT;
1408 if (TARGET_HARD_TP && TARGET_THUMB1)
1409 error ("can not use -mtp=cp15 with 16-bit Thumb");
1411 /* Override the default structure alignment for AAPCS ABI. */
1412 if (TARGET_AAPCS_BASED)
1413 arm_structure_size_boundary = 8;
1415 if (structure_size_string != NULL)
1417 int size = strtol (structure_size_string, NULL, 0);
1419 if (size == 8 || size == 32
1420 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1421 arm_structure_size_boundary = size;
1423 warning (0, "structure size boundary can only be set to %s",
1424 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1427 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1429 error ("RTP PIC is incompatible with Thumb");
1433 /* If stack checking is disabled, we can use r10 as the PIC register,
1434 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1435 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1437 if (TARGET_VXWORKS_RTP)
1438 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1439 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1442 if (flag_pic && TARGET_VXWORKS_RTP)
1443 arm_pic_register = 9;
1445 if (arm_pic_register_string != NULL)
1447 int pic_register = decode_reg_name (arm_pic_register_string);
1450 warning (0, "-mpic-register= is useless without -fpic");
1452 /* Prevent the user from choosing an obviously stupid PIC register. */
1453 else if (pic_register < 0 || call_used_regs[pic_register]
1454 || pic_register == HARD_FRAME_POINTER_REGNUM
1455 || pic_register == STACK_POINTER_REGNUM
1456 || pic_register >= PC_REGNUM
1457 || (TARGET_VXWORKS_RTP
1458 && (unsigned int) pic_register != arm_pic_register))
1459 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1461 arm_pic_register = pic_register;
1464 /* ??? We might want scheduling for thumb2. */
1465 if (TARGET_THUMB && flag_schedule_insns)
1467 /* Don't warn since it's on by default in -O2. */
1468 flag_schedule_insns = 0;
1473 arm_constant_limit = 1;
1475 /* If optimizing for size, bump the number of instructions that we
1476 are prepared to conditionally execute (even on a StrongARM). */
1477 max_insns_skipped = 6;
1481 /* For processors with load scheduling, it never costs more than
1482 2 cycles to load a constant, and the load scheduler may well
1483 reduce that to 1. */
1485 arm_constant_limit = 1;
1487 /* On XScale the longer latency of a load makes it more difficult
1488 to achieve a good schedule, so it's faster to synthesize
1489 constants that can be done in two insns. */
1490 if (arm_tune_xscale)
1491 arm_constant_limit = 2;
1493 /* StrongARM has early execution of branches, so a sequence
1494 that is worth skipping is shorter. */
1495 if (arm_tune_strongarm)
1496 max_insns_skipped = 3;
1499 /* Register global variables with the garbage collector. */
1500 arm_add_gc_roots ();
1504 arm_add_gc_roots (void)
1506 gcc_obstack_init(&minipool_obstack);
1507 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1510 /* A table of known ARM exception types.
1511 For use with the interrupt function attribute. */
1515 const char *const arg;
1516 const unsigned long return_value;
1520 static const isr_attribute_arg isr_attribute_args [] =
1522 { "IRQ", ARM_FT_ISR },
1523 { "irq", ARM_FT_ISR },
1524 { "FIQ", ARM_FT_FIQ },
1525 { "fiq", ARM_FT_FIQ },
1526 { "ABORT", ARM_FT_ISR },
1527 { "abort", ARM_FT_ISR },
1528 { "ABORT", ARM_FT_ISR },
1529 { "abort", ARM_FT_ISR },
1530 { "UNDEF", ARM_FT_EXCEPTION },
1531 { "undef", ARM_FT_EXCEPTION },
1532 { "SWI", ARM_FT_EXCEPTION },
1533 { "swi", ARM_FT_EXCEPTION },
1534 { NULL, ARM_FT_NORMAL }
1537 /* Returns the (interrupt) function type of the current
1538 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1540 static unsigned long
1541 arm_isr_value (tree argument)
1543 const isr_attribute_arg * ptr;
1547 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1549 /* No argument - default to IRQ. */
1550 if (argument == NULL_TREE)
1553 /* Get the value of the argument. */
1554 if (TREE_VALUE (argument) == NULL_TREE
1555 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1556 return ARM_FT_UNKNOWN;
1558 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1560 /* Check it against the list of known arguments. */
1561 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1562 if (streq (arg, ptr->arg))
1563 return ptr->return_value;
1565 /* An unrecognized interrupt type. */
1566 return ARM_FT_UNKNOWN;
1569 /* Computes the type of the current function. */
1571 static unsigned long
1572 arm_compute_func_type (void)
1574 unsigned long type = ARM_FT_UNKNOWN;
1578 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1580 /* Decide if the current function is volatile. Such functions
1581 never return, and many memory cycles can be saved by not storing
1582 register values that will never be needed again. This optimization
1583 was added to speed up context switching in a kernel application. */
1585 && (TREE_NOTHROW (current_function_decl)
1586 || !(flag_unwind_tables
1587 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1588 && TREE_THIS_VOLATILE (current_function_decl))
1589 type |= ARM_FT_VOLATILE;
1591 if (cfun->static_chain_decl != NULL)
1592 type |= ARM_FT_NESTED;
1594 attr = DECL_ATTRIBUTES (current_function_decl);
1596 a = lookup_attribute ("naked", attr);
1598 type |= ARM_FT_NAKED;
1600 a = lookup_attribute ("isr", attr);
1602 a = lookup_attribute ("interrupt", attr);
1605 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1607 type |= arm_isr_value (TREE_VALUE (a));
1612 /* Returns the type of the current function. */
1615 arm_current_func_type (void)
1617 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1618 cfun->machine->func_type = arm_compute_func_type ();
1620 return cfun->machine->func_type;
1623 /* Return 1 if it is possible to return using a single instruction.
1624 If SIBLING is non-null, this is a test for a return before a sibling
1625 call. SIBLING is the call insn, so we can examine its register usage. */
1628 use_return_insn (int iscond, rtx sibling)
1631 unsigned int func_type;
1632 unsigned long saved_int_regs;
1633 unsigned HOST_WIDE_INT stack_adjust;
1634 arm_stack_offsets *offsets;
1636 /* Never use a return instruction before reload has run. */
1637 if (!reload_completed)
1640 func_type = arm_current_func_type ();
1642 /* Naked, volatile and stack alignment functions need special
1644 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
1647 /* So do interrupt functions that use the frame pointer and Thumb
1648 interrupt functions. */
1649 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
1652 offsets = arm_get_frame_offsets ();
1653 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1655 /* As do variadic functions. */
1656 if (current_function_pretend_args_size
1657 || cfun->machine->uses_anonymous_args
1658 /* Or if the function calls __builtin_eh_return () */
1659 || current_function_calls_eh_return
1660 /* Or if the function calls alloca */
1661 || current_function_calls_alloca
1662 /* Or if there is a stack adjustment. However, if the stack pointer
1663 is saved on the stack, we can use a pre-incrementing stack load. */
1664 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
1665 && stack_adjust == 4)))
1668 saved_int_regs = offsets->saved_regs_mask;
1670 /* Unfortunately, the insn
1672 ldmib sp, {..., sp, ...}
1674 triggers a bug on most SA-110 based devices, such that the stack
1675 pointer won't be correctly restored if the instruction takes a
1676 page fault. We work around this problem by popping r3 along with
1677 the other registers, since that is never slower than executing
1678 another instruction.
1680 We test for !arm_arch5 here, because code for any architecture
1681 less than this could potentially be run on one of the buggy
1683 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
1685 /* Validate that r3 is a call-clobbered register (always true in
1686 the default abi) ... */
1687 if (!call_used_regs[3])
1690 /* ... that it isn't being used for a return value ... */
1691 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1694 /* ... or for a tail-call argument ... */
1697 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1699 if (find_regno_fusage (sibling, USE, 3))
1703 /* ... and that there are no call-saved registers in r0-r2
1704 (always true in the default ABI). */
1705 if (saved_int_regs & 0x7)
1709 /* Can't be done if interworking with Thumb, and any registers have been
1711 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
1714 /* On StrongARM, conditional returns are expensive if they aren't
1715 taken and multiple registers have been stacked. */
1716 if (iscond && arm_tune_strongarm)
1718 /* Conditional return when just the LR is stored is a simple
1719 conditional-load instruction, that's not expensive. */
1720 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1724 && arm_pic_register != INVALID_REGNUM
1725 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
1729 /* If there are saved registers but the LR isn't saved, then we need
1730 two instructions for the return. */
1731 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1734 /* Can't be done if any of the FPA regs are pushed,
1735 since this also requires an insn. */
1736 if (TARGET_HARD_FLOAT && TARGET_FPA)
1737 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1738 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1741 /* Likewise VFP regs. */
1742 if (TARGET_HARD_FLOAT && TARGET_VFP)
1743 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1744 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1747 if (TARGET_REALLY_IWMMXT)
1748 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1749 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
1755 /* Return TRUE if int I is a valid immediate ARM constant. */
1758 const_ok_for_arm (HOST_WIDE_INT i)
1762 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1763 be all zero, or all one. */
1764 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1765 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1766 != ((~(unsigned HOST_WIDE_INT) 0)
1767 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1770 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1772 /* Fast return for 0 and small values. We must do this for zero, since
1773 the code below can't handle that one case. */
1774 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1777 /* Get the number of trailing zeros. */
1778 lowbit = ffs((int) i) - 1;
1780 /* Only even shifts are allowed in ARM mode so round down to the
1781 nearest even number. */
1785 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1790 /* Allow rotated constants in ARM mode. */
1792 && ((i & ~0xc000003f) == 0
1793 || (i & ~0xf000000f) == 0
1794 || (i & ~0xfc000003) == 0))
1801 /* Allow repeated pattern. */
1804 if (i == v || i == (v | (v << 8)))
1811 /* Return true if I is a valid constant for the operation CODE. */
1813 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1815 if (const_ok_for_arm (i))
1821 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
1823 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
1829 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
1836 /* Emit a sequence of insns to handle a large constant.
1837 CODE is the code of the operation required, it can be any of SET, PLUS,
1838 IOR, AND, XOR, MINUS;
1839 MODE is the mode in which the operation is being performed;
1840 VAL is the integer to operate on;
1841 SOURCE is the other operand (a register, or a null-pointer for SET);
1842 SUBTARGETS means it is safe to create scratch registers if that will
1843 either produce a simpler sequence, or we will want to cse the values.
1844 Return value is the number of insns emitted. */
1846 /* ??? Tweak this for thumb2. */
1848 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1849 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
1853 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
1854 cond = COND_EXEC_TEST (PATTERN (insn));
1858 if (subtargets || code == SET
1859 || (GET_CODE (target) == REG && GET_CODE (source) == REG
1860 && REGNO (target) != REGNO (source)))
1862 /* After arm_reorg has been called, we can't fix up expensive
1863 constants by pushing them into memory so we must synthesize
1864 them in-line, regardless of the cost. This is only likely to
1865 be more costly on chips that have load delay slots and we are
1866 compiling without running the scheduler (so no splitting
1867 occurred before the final instruction emission).
1869 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1871 if (!after_arm_reorg
1873 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1875 > arm_constant_limit + (code != SET)))
1879 /* Currently SET is the only monadic value for CODE, all
1880 the rest are diadic. */
1881 emit_set_insn (target, GEN_INT (val));
1886 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
1888 emit_set_insn (temp, GEN_INT (val));
1889 /* For MINUS, the value is subtracted from, since we never
1890 have subtraction of a constant. */
1892 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
1894 emit_set_insn (target,
1895 gen_rtx_fmt_ee (code, mode, source, temp));
1901 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
1905 /* Return the number of ARM instructions required to synthesize the given
1908 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
1910 HOST_WIDE_INT temp1;
1918 if (remainder & (3 << (i - 2)))
1923 temp1 = remainder & ((0x0ff << end)
1924 | ((i < end) ? (0xff >> (32 - end)) : 0));
1925 remainder &= ~temp1;
1930 } while (remainder);
1934 /* Emit an instruction with the indicated PATTERN. If COND is
1935 non-NULL, conditionalize the execution of the instruction on COND
1939 emit_constant_insn (rtx cond, rtx pattern)
1942 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
1943 emit_insn (pattern);
1946 /* As above, but extra parameter GENERATE which, if clear, suppresses
1948 /* ??? This needs more work for thumb2. */
1951 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
1952 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
1957 int can_negate_initial = 0;
1960 int num_bits_set = 0;
1961 int set_sign_bit_copies = 0;
1962 int clear_sign_bit_copies = 0;
1963 int clear_zero_bit_copies = 0;
1964 int set_zero_bit_copies = 0;
1966 unsigned HOST_WIDE_INT temp1, temp2;
1967 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
1969 /* Find out which operations are safe for a given CODE. Also do a quick
1970 check for degenerate cases; these can occur when DImode operations
1982 can_negate_initial = 1;
1986 if (remainder == 0xffffffff)
1989 emit_constant_insn (cond,
1990 gen_rtx_SET (VOIDmode, target,
1991 GEN_INT (ARM_SIGN_EXTEND (val))));
1996 if (reload_completed && rtx_equal_p (target, source))
1999 emit_constant_insn (cond,
2000 gen_rtx_SET (VOIDmode, target, source));
2009 emit_constant_insn (cond,
2010 gen_rtx_SET (VOIDmode, target, const0_rtx));
2013 if (remainder == 0xffffffff)
2015 if (reload_completed && rtx_equal_p (target, source))
2018 emit_constant_insn (cond,
2019 gen_rtx_SET (VOIDmode, target, source));
2028 if (reload_completed && rtx_equal_p (target, source))
2031 emit_constant_insn (cond,
2032 gen_rtx_SET (VOIDmode, target, source));
2036 /* We don't know how to handle other cases yet. */
2037 gcc_assert (remainder == 0xffffffff);
2040 emit_constant_insn (cond,
2041 gen_rtx_SET (VOIDmode, target,
2042 gen_rtx_NOT (mode, source)));
2046 /* We treat MINUS as (val - source), since (source - val) is always
2047 passed as (source + (-val)). */
2051 emit_constant_insn (cond,
2052 gen_rtx_SET (VOIDmode, target,
2053 gen_rtx_NEG (mode, source)));
2056 if (const_ok_for_arm (val))
2059 emit_constant_insn (cond,
2060 gen_rtx_SET (VOIDmode, target,
2061 gen_rtx_MINUS (mode, GEN_INT (val),
2073 /* If we can do it in one insn get out quickly. */
2074 if (const_ok_for_arm (val)
2075 || (can_negate_initial && const_ok_for_arm (-val))
2076 || (can_invert && const_ok_for_arm (~val)))
2079 emit_constant_insn (cond,
2080 gen_rtx_SET (VOIDmode, target,
2082 ? gen_rtx_fmt_ee (code, mode, source,
2088 /* Calculate a few attributes that may be useful for specific
2090 for (i = 31; i >= 0; i--)
2092 if ((remainder & (1 << i)) == 0)
2093 clear_sign_bit_copies++;
2098 for (i = 31; i >= 0; i--)
2100 if ((remainder & (1 << i)) != 0)
2101 set_sign_bit_copies++;
2106 for (i = 0; i <= 31; i++)
2108 if ((remainder & (1 << i)) == 0)
2109 clear_zero_bit_copies++;
2114 for (i = 0; i <= 31; i++)
2116 if ((remainder & (1 << i)) != 0)
2117 set_zero_bit_copies++;
2125 /* See if we can use movw. */
2126 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2129 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2134 /* See if we can do this by sign_extending a constant that is known
2135 to be negative. This is a good, way of doing it, since the shift
2136 may well merge into a subsequent insn. */
2137 if (set_sign_bit_copies > 1)
2139 if (const_ok_for_arm
2140 (temp1 = ARM_SIGN_EXTEND (remainder
2141 << (set_sign_bit_copies - 1))))
2145 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2146 emit_constant_insn (cond,
2147 gen_rtx_SET (VOIDmode, new_src,
2149 emit_constant_insn (cond,
2150 gen_ashrsi3 (target, new_src,
2151 GEN_INT (set_sign_bit_copies - 1)));
2155 /* For an inverted constant, we will need to set the low bits,
2156 these will be shifted out of harm's way. */
2157 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2158 if (const_ok_for_arm (~temp1))
2162 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2163 emit_constant_insn (cond,
2164 gen_rtx_SET (VOIDmode, new_src,
2166 emit_constant_insn (cond,
2167 gen_ashrsi3 (target, new_src,
2168 GEN_INT (set_sign_bit_copies - 1)));
2174 /* See if we can calculate the value as the difference between two
2175 valid immediates. */
2176 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2178 int topshift = clear_sign_bit_copies & ~1;
2180 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2181 & (0xff000000 >> topshift));
2183 /* If temp1 is zero, then that means the 9 most significant
2184 bits of remainder were 1 and we've caused it to overflow.
2185 When topshift is 0 we don't need to do anything since we
2186 can borrow from 'bit 32'. */
2187 if (temp1 == 0 && topshift != 0)
2188 temp1 = 0x80000000 >> (topshift - 1);
2190 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2192 if (const_ok_for_arm (temp2))
2196 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2197 emit_constant_insn (cond,
2198 gen_rtx_SET (VOIDmode, new_src,
2200 emit_constant_insn (cond,
2201 gen_addsi3 (target, new_src,
2209 /* See if we can generate this by setting the bottom (or the top)
2210 16 bits, and then shifting these into the other half of the
2211 word. We only look for the simplest cases, to do more would cost
2212 too much. Be careful, however, not to generate this when the
2213 alternative would take fewer insns. */
2214 if (val & 0xffff0000)
2216 temp1 = remainder & 0xffff0000;
2217 temp2 = remainder & 0x0000ffff;
2219 /* Overlaps outside this range are best done using other methods. */
2220 for (i = 9; i < 24; i++)
2222 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2223 && !const_ok_for_arm (temp2))
2225 rtx new_src = (subtargets
2226 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2228 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2229 source, subtargets, generate);
2237 gen_rtx_ASHIFT (mode, source,
2244 /* Don't duplicate cases already considered. */
2245 for (i = 17; i < 24; i++)
2247 if (((temp1 | (temp1 >> i)) == remainder)
2248 && !const_ok_for_arm (temp1))
2250 rtx new_src = (subtargets
2251 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2253 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2254 source, subtargets, generate);
2259 gen_rtx_SET (VOIDmode, target,
2262 gen_rtx_LSHIFTRT (mode, source,
2273 /* If we have IOR or XOR, and the constant can be loaded in a
2274 single instruction, and we can find a temporary to put it in,
2275 then this can be done in two instructions instead of 3-4. */
2277 /* TARGET can't be NULL if SUBTARGETS is 0 */
2278 || (reload_completed && !reg_mentioned_p (target, source)))
2280 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2284 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2286 emit_constant_insn (cond,
2287 gen_rtx_SET (VOIDmode, sub,
2289 emit_constant_insn (cond,
2290 gen_rtx_SET (VOIDmode, target,
2291 gen_rtx_fmt_ee (code, mode,
2301 if (set_sign_bit_copies > 8
2302 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2306 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2307 rtx shift = GEN_INT (set_sign_bit_copies);
2311 gen_rtx_SET (VOIDmode, sub,
2313 gen_rtx_ASHIFT (mode,
2318 gen_rtx_SET (VOIDmode, target,
2320 gen_rtx_LSHIFTRT (mode, sub,
2326 if (set_zero_bit_copies > 8
2327 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2331 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2332 rtx shift = GEN_INT (set_zero_bit_copies);
2336 gen_rtx_SET (VOIDmode, sub,
2338 gen_rtx_LSHIFTRT (mode,
2343 gen_rtx_SET (VOIDmode, target,
2345 gen_rtx_ASHIFT (mode, sub,
2351 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2355 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2356 emit_constant_insn (cond,
2357 gen_rtx_SET (VOIDmode, sub,
2358 gen_rtx_NOT (mode, source)));
2361 sub = gen_reg_rtx (mode);
2362 emit_constant_insn (cond,
2363 gen_rtx_SET (VOIDmode, sub,
2364 gen_rtx_AND (mode, source,
2366 emit_constant_insn (cond,
2367 gen_rtx_SET (VOIDmode, target,
2368 gen_rtx_NOT (mode, sub)));
2375 /* See if two shifts will do 2 or more insn's worth of work. */
2376 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2378 HOST_WIDE_INT shift_mask = ((0xffffffff
2379 << (32 - clear_sign_bit_copies))
2382 if ((remainder | shift_mask) != 0xffffffff)
2386 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2387 insns = arm_gen_constant (AND, mode, cond,
2388 remainder | shift_mask,
2389 new_src, source, subtargets, 1);
2394 rtx targ = subtargets ? NULL_RTX : target;
2395 insns = arm_gen_constant (AND, mode, cond,
2396 remainder | shift_mask,
2397 targ, source, subtargets, 0);
2403 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2404 rtx shift = GEN_INT (clear_sign_bit_copies);
2406 emit_insn (gen_ashlsi3 (new_src, source, shift));
2407 emit_insn (gen_lshrsi3 (target, new_src, shift));
2413 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2415 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2417 if ((remainder | shift_mask) != 0xffffffff)
2421 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2423 insns = arm_gen_constant (AND, mode, cond,
2424 remainder | shift_mask,
2425 new_src, source, subtargets, 1);
2430 rtx targ = subtargets ? NULL_RTX : target;
2432 insns = arm_gen_constant (AND, mode, cond,
2433 remainder | shift_mask,
2434 targ, source, subtargets, 0);
2440 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2441 rtx shift = GEN_INT (clear_zero_bit_copies);
2443 emit_insn (gen_lshrsi3 (new_src, source, shift));
2444 emit_insn (gen_ashlsi3 (target, new_src, shift));
2456 for (i = 0; i < 32; i++)
2457 if (remainder & (1 << i))
2460 if (code == AND || (can_invert && num_bits_set > 16))
2461 remainder = (~remainder) & 0xffffffff;
2462 else if (code == PLUS && num_bits_set > 16)
2463 remainder = (-remainder) & 0xffffffff;
2470 /* Now try and find a way of doing the job in either two or three
2472 We start by looking for the largest block of zeros that are aligned on
2473 a 2-bit boundary, we then fill up the temps, wrapping around to the
2474 top of the word when we drop off the bottom.
2475 In the worst case this code should produce no more than four insns.
2476 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2477 best place to start. */
2479 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2485 int best_consecutive_zeros = 0;
2487 for (i = 0; i < 32; i += 2)
2489 int consecutive_zeros = 0;
2491 if (!(remainder & (3 << i)))
2493 while ((i < 32) && !(remainder & (3 << i)))
2495 consecutive_zeros += 2;
2498 if (consecutive_zeros > best_consecutive_zeros)
2500 best_consecutive_zeros = consecutive_zeros;
2501 best_start = i - consecutive_zeros;
2507 /* So long as it won't require any more insns to do so, it's
2508 desirable to emit a small constant (in bits 0...9) in the last
2509 insn. This way there is more chance that it can be combined with
2510 a later addressing insn to form a pre-indexed load or store
2511 operation. Consider:
2513 *((volatile int *)0xe0000100) = 1;
2514 *((volatile int *)0xe0000110) = 2;
2516 We want this to wind up as:
2520 str rB, [rA, #0x100]
2522 str rB, [rA, #0x110]
2524 rather than having to synthesize both large constants from scratch.
2526 Therefore, we calculate how many insns would be required to emit
2527 the constant starting from `best_start', and also starting from
2528 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2529 yield a shorter sequence, we may as well use zero. */
2531 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2532 && (count_insns_for_constant (remainder, 0) <=
2533 count_insns_for_constant (remainder, best_start)))
2537 /* Now start emitting the insns. */
2545 if (remainder & (3 << (i - 2)))
2550 temp1 = remainder & ((0x0ff << end)
2551 | ((i < end) ? (0xff >> (32 - end)) : 0));
2552 remainder &= ~temp1;
2556 rtx new_src, temp1_rtx;
2558 if (code == SET || code == MINUS)
2560 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2561 if (can_invert && code != MINUS)
2566 if (remainder && subtargets)
2567 new_src = gen_reg_rtx (mode);
2572 else if (can_negate)
2576 temp1 = trunc_int_for_mode (temp1, mode);
2577 temp1_rtx = GEN_INT (temp1);
2581 else if (code == MINUS)
2582 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2584 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2586 emit_constant_insn (cond,
2587 gen_rtx_SET (VOIDmode, new_src,
2597 else if (code == MINUS)
2606 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
2619 /* Canonicalize a comparison so that we are more likely to recognize it.
2620 This can be done for a few constant compares, where we can make the
2621 immediate value easier to load. */
2624 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2627 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2628 unsigned HOST_WIDE_INT maxval;
2629 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2640 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2642 *op1 = GEN_INT (i + 1);
2643 return code == GT ? GE : LT;
2650 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2652 *op1 = GEN_INT (i - 1);
2653 return code == GE ? GT : LE;
2659 if (i != ~((unsigned HOST_WIDE_INT) 0)
2660 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2662 *op1 = GEN_INT (i + 1);
2663 return code == GTU ? GEU : LTU;
2670 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2672 *op1 = GEN_INT (i - 1);
2673 return code == GEU ? GTU : LEU;
2685 /* Define how to find the value returned by a function. */
2688 arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
2690 enum machine_mode mode;
2691 int unsignedp ATTRIBUTE_UNUSED;
2692 rtx r ATTRIBUTE_UNUSED;
2694 mode = TYPE_MODE (type);
2695 /* Promote integer types. */
2696 if (INTEGRAL_TYPE_P (type))
2697 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2699 /* Promotes small structs returned in a register to full-word size
2700 for big-endian AAPCS. */
2701 if (arm_return_in_msb (type))
2703 HOST_WIDE_INT size = int_size_in_bytes (type);
2704 if (size % UNITS_PER_WORD != 0)
2706 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2707 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2711 return LIBCALL_VALUE(mode);
2714 /* Determine the amount of memory needed to store the possible return
2715 registers of an untyped call. */
2717 arm_apply_result_size (void)
2723 if (TARGET_HARD_FLOAT_ABI)
2727 if (TARGET_MAVERICK)
2730 if (TARGET_IWMMXT_ABI)
2737 /* Decide whether a type should be returned in memory (true)
2738 or in a register (false). This is called by the macro
2739 RETURN_IN_MEMORY. */
2741 arm_return_in_memory (const_tree type)
2745 size = int_size_in_bytes (type);
2747 /* Vector values should be returned using ARM registers, not memory (unless
2748 they're over 16 bytes, which will break since we only have four
2749 call-clobbered registers to play with). */
2750 if (TREE_CODE (type) == VECTOR_TYPE)
2751 return (size < 0 || size > (4 * UNITS_PER_WORD));
2753 if (!AGGREGATE_TYPE_P (type) &&
2754 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2755 /* All simple types are returned in registers.
2756 For AAPCS, complex types are treated the same as aggregates. */
2759 if (arm_abi != ARM_ABI_APCS)
2761 /* ATPCS and later return aggregate types in memory only if they are
2762 larger than a word (or are variable size). */
2763 return (size < 0 || size > UNITS_PER_WORD);
2766 /* For the arm-wince targets we choose to be compatible with Microsoft's
2767 ARM and Thumb compilers, which always return aggregates in memory. */
2769 /* All structures/unions bigger than one word are returned in memory.
2770 Also catch the case where int_size_in_bytes returns -1. In this case
2771 the aggregate is either huge or of variable size, and in either case
2772 we will want to return it via memory and not in a register. */
2773 if (size < 0 || size > UNITS_PER_WORD)
2776 if (TREE_CODE (type) == RECORD_TYPE)
2780 /* For a struct the APCS says that we only return in a register
2781 if the type is 'integer like' and every addressable element
2782 has an offset of zero. For practical purposes this means
2783 that the structure can have at most one non bit-field element
2784 and that this element must be the first one in the structure. */
2786 /* Find the first field, ignoring non FIELD_DECL things which will
2787 have been created by C++. */
2788 for (field = TYPE_FIELDS (type);
2789 field && TREE_CODE (field) != FIELD_DECL;
2790 field = TREE_CHAIN (field))
2794 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2796 /* Check that the first field is valid for returning in a register. */
2798 /* ... Floats are not allowed */
2799 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2802 /* ... Aggregates that are not themselves valid for returning in
2803 a register are not allowed. */
2804 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2807 /* Now check the remaining fields, if any. Only bitfields are allowed,
2808 since they are not addressable. */
2809 for (field = TREE_CHAIN (field);
2811 field = TREE_CHAIN (field))
2813 if (TREE_CODE (field) != FIELD_DECL)
2816 if (!DECL_BIT_FIELD_TYPE (field))
2823 if (TREE_CODE (type) == UNION_TYPE)
2827 /* Unions can be returned in registers if every element is
2828 integral, or can be returned in an integer register. */
2829 for (field = TYPE_FIELDS (type);
2831 field = TREE_CHAIN (field))
2833 if (TREE_CODE (field) != FIELD_DECL)
2836 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2839 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2845 #endif /* not ARM_WINCE */
2847 /* Return all other types in memory. */
2851 /* Indicate whether or not words of a double are in big-endian order. */
2854 arm_float_words_big_endian (void)
2856 if (TARGET_MAVERICK)
2859 /* For FPA, float words are always big-endian. For VFP, floats words
2860 follow the memory system mode. */
2868 return (TARGET_BIG_END ? 1 : 0);
2873 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2874 for a call to a function whose data type is FNTYPE.
2875 For a library call, FNTYPE is NULL. */
2877 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
2878 rtx libname ATTRIBUTE_UNUSED,
2879 tree fndecl ATTRIBUTE_UNUSED)
2881 /* On the ARM, the offset starts at 0. */
2883 pcum->iwmmxt_nregs = 0;
2884 pcum->can_split = true;
2886 /* Varargs vectors are treated the same as long long.
2887 named_count avoids having to change the way arm handles 'named' */
2888 pcum->named_count = 0;
2891 if (TARGET_REALLY_IWMMXT && fntype)
2895 for (fn_arg = TYPE_ARG_TYPES (fntype);
2897 fn_arg = TREE_CHAIN (fn_arg))
2898 pcum->named_count += 1;
2900 if (! pcum->named_count)
2901 pcum->named_count = INT_MAX;
2906 /* Return true if mode/type need doubleword alignment. */
2908 arm_needs_doubleword_align (enum machine_mode mode, tree type)
2910 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
2911 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
2915 /* Determine where to put an argument to a function.
2916 Value is zero to push the argument on the stack,
2917 or a hard register in which to store the argument.
2919 MODE is the argument's machine mode.
2920 TYPE is the data type of the argument (as a tree).
2921 This is null for libcalls where that information may
2923 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2924 the preceding args and about the function being called.
2925 NAMED is nonzero if this argument is a named parameter
2926 (otherwise it is an extra parameter matching an ellipsis). */
2929 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2930 tree type, int named)
2934 /* Varargs vectors are treated the same as long long.
2935 named_count avoids having to change the way arm handles 'named' */
2936 if (TARGET_IWMMXT_ABI
2937 && arm_vector_mode_supported_p (mode)
2938 && pcum->named_count > pcum->nargs + 1)
2940 if (pcum->iwmmxt_nregs <= 9)
2941 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
2944 pcum->can_split = false;
2949 /* Put doubleword aligned quantities in even register pairs. */
2951 && ARM_DOUBLEWORD_ALIGN
2952 && arm_needs_doubleword_align (mode, type))
2955 if (mode == VOIDmode)
2956 /* Pick an arbitrary value for operand 2 of the call insn. */
2959 /* Only allow splitting an arg between regs and memory if all preceding
2960 args were allocated to regs. For args passed by reference we only count
2961 the reference pointer. */
2962 if (pcum->can_split)
2965 nregs = ARM_NUM_REGS2 (mode, type);
2967 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
2970 return gen_rtx_REG (mode, pcum->nregs);
2974 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2975 tree type, bool named ATTRIBUTE_UNUSED)
2977 int nregs = pcum->nregs;
2979 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
2982 if (NUM_ARG_REGS > nregs
2983 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
2985 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
2990 /* Variable sized types are passed by reference. This is a GCC
2991 extension to the ARM ABI. */
2994 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2995 enum machine_mode mode ATTRIBUTE_UNUSED,
2996 const_tree type, bool named ATTRIBUTE_UNUSED)
2998 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3001 /* Encode the current state of the #pragma [no_]long_calls. */
3004 OFF, /* No #pragma [no_]long_calls is in effect. */
3005 LONG, /* #pragma long_calls is in effect. */
3006 SHORT /* #pragma no_long_calls is in effect. */
3009 static arm_pragma_enum arm_pragma_long_calls = OFF;
3012 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3014 arm_pragma_long_calls = LONG;
3018 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3020 arm_pragma_long_calls = SHORT;
3024 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3026 arm_pragma_long_calls = OFF;
3029 /* Table of machine attributes. */
3030 const struct attribute_spec arm_attribute_table[] =
3032 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
3033 /* Function calls made to this symbol must be done indirectly, because
3034 it may lie outside of the 26 bit addressing range of a normal function
3036 { "long_call", 0, 0, false, true, true, NULL },
3037 /* Whereas these functions are always known to reside within the 26 bit
3038 addressing range. */
3039 { "short_call", 0, 0, false, true, true, NULL },
3040 /* Interrupt Service Routines have special prologue and epilogue requirements. */
3041 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
3042 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
3043 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3045 /* ARM/PE has three new attributes:
3047 dllexport - for exporting a function/variable that will live in a dll
3048 dllimport - for importing a function/variable from a dll
3050 Microsoft allows multiple declspecs in one __declspec, separating
3051 them with spaces. We do NOT support this. Instead, use __declspec
3054 { "dllimport", 0, 0, true, false, false, NULL },
3055 { "dllexport", 0, 0, true, false, false, NULL },
3056 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3057 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
3058 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
3059 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3060 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
3062 { NULL, 0, 0, false, false, false, NULL }
3065 /* Handle an attribute requiring a FUNCTION_DECL;
3066 arguments as in struct attribute_spec.handler. */
3068 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
3069 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
3071 if (TREE_CODE (*node) != FUNCTION_DECL)
3073 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3074 IDENTIFIER_POINTER (name));
3075 *no_add_attrs = true;
3081 /* Handle an "interrupt" or "isr" attribute;
3082 arguments as in struct attribute_spec.handler. */
3084 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
3089 if (TREE_CODE (*node) != FUNCTION_DECL)
3091 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3092 IDENTIFIER_POINTER (name));
3093 *no_add_attrs = true;
3095 /* FIXME: the argument if any is checked for type attributes;
3096 should it be checked for decl ones? */
3100 if (TREE_CODE (*node) == FUNCTION_TYPE
3101 || TREE_CODE (*node) == METHOD_TYPE)
3103 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
3105 warning (OPT_Wattributes, "%qs attribute ignored",
3106 IDENTIFIER_POINTER (name));
3107 *no_add_attrs = true;
3110 else if (TREE_CODE (*node) == POINTER_TYPE
3111 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
3112 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
3113 && arm_isr_value (args) != ARM_FT_UNKNOWN)
3115 *node = build_variant_type_copy (*node);
3116 TREE_TYPE (*node) = build_type_attribute_variant
3118 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
3119 *no_add_attrs = true;
3123 /* Possibly pass this attribute on from the type to a decl. */
3124 if (flags & ((int) ATTR_FLAG_DECL_NEXT
3125 | (int) ATTR_FLAG_FUNCTION_NEXT
3126 | (int) ATTR_FLAG_ARRAY_NEXT))
3128 *no_add_attrs = true;
3129 return tree_cons (name, args, NULL_TREE);
3133 warning (OPT_Wattributes, "%qs attribute ignored",
3134 IDENTIFIER_POINTER (name));
3142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
3143 /* Handle the "notshared" attribute. This attribute is another way of
3144 requesting hidden visibility. ARM's compiler supports
3145 "__declspec(notshared)"; we support the same thing via an
3149 arm_handle_notshared_attribute (tree *node,
3150 tree name ATTRIBUTE_UNUSED,
3151 tree args ATTRIBUTE_UNUSED,
3152 int flags ATTRIBUTE_UNUSED,
3155 tree decl = TYPE_NAME (*node);
3159 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
3160 DECL_VISIBILITY_SPECIFIED (decl) = 1;
3161 *no_add_attrs = false;
3167 /* Return 0 if the attributes for two types are incompatible, 1 if they
3168 are compatible, and 2 if they are nearly compatible (which causes a
3169 warning to be generated). */
3171 arm_comp_type_attributes (const_tree type1, const_tree type2)
3175 /* Check for mismatch of non-default calling convention. */
3176 if (TREE_CODE (type1) != FUNCTION_TYPE)
3179 /* Check for mismatched call attributes. */
3180 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
3181 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
3182 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
3183 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
3185 /* Only bother to check if an attribute is defined. */
3186 if (l1 | l2 | s1 | s2)
3188 /* If one type has an attribute, the other must have the same attribute. */
3189 if ((l1 != l2) || (s1 != s2))
3192 /* Disallow mixed attributes. */
3193 if ((l1 & s2) || (l2 & s1))
3197 /* Check for mismatched ISR attribute. */
3198 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3200 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3201 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3203 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3210 /* Assigns default attributes to newly defined type. This is used to
3211 set short_call/long_call attributes for function types of
3212 functions defined inside corresponding #pragma scopes. */
3214 arm_set_default_type_attributes (tree type)
3216 /* Add __attribute__ ((long_call)) to all functions, when
3217 inside #pragma long_calls or __attribute__ ((short_call)),
3218 when inside #pragma no_long_calls. */
3219 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3221 tree type_attr_list, attr_name;
3222 type_attr_list = TYPE_ATTRIBUTES (type);
3224 if (arm_pragma_long_calls == LONG)
3225 attr_name = get_identifier ("long_call");
3226 else if (arm_pragma_long_calls == SHORT)
3227 attr_name = get_identifier ("short_call");
3231 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3232 TYPE_ATTRIBUTES (type) = type_attr_list;
3236 /* Return true if DECL is known to be linked into section SECTION. */
3239 arm_function_in_section_p (tree decl, section *section)
3241 /* We can only be certain about functions defined in the same
3242 compilation unit. */
3243 if (!TREE_STATIC (decl))
3246 /* Make sure that SYMBOL always binds to the definition in this
3247 compilation unit. */
3248 if (!targetm.binds_local_p (decl))
3251 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
3252 if (!DECL_SECTION_NAME (decl))
3254 /* Only cater for unit-at-a-time mode, where we know that the user
3255 cannot later specify a section for DECL. */
3256 if (!flag_unit_at_a_time)
3259 /* Make sure that we will not create a unique section for DECL. */
3260 if (flag_function_sections || DECL_ONE_ONLY (decl))
3264 return function_section (decl) == section;
3267 /* Return nonzero if a 32-bit "long_call" should be generated for
3268 a call from the current function to DECL. We generate a long_call
3271 a. has an __attribute__((long call))
3272 or b. is within the scope of a #pragma long_calls
3273 or c. the -mlong-calls command line switch has been specified
3275 However we do not generate a long call if the function:
3277 d. has an __attribute__ ((short_call))
3278 or e. is inside the scope of a #pragma no_long_calls
3279 or f. is defined in the same section as the current function. */
3282 arm_is_long_call_p (tree decl)
3287 return TARGET_LONG_CALLS;
3289 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
3290 if (lookup_attribute ("short_call", attrs))
3293 /* For "f", be conservative, and only cater for cases in which the
3294 whole of the current function is placed in the same section. */
3295 if (!flag_reorder_blocks_and_partition
3296 && arm_function_in_section_p (decl, current_function_section ()))
3299 if (lookup_attribute ("long_call", attrs))
3302 return TARGET_LONG_CALLS;
3305 /* Return nonzero if it is ok to make a tail-call to DECL. */
3307 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3309 unsigned long func_type;
3311 if (cfun->machine->sibcall_blocked)
3314 /* Never tailcall something for which we have no decl, or if we
3315 are in Thumb mode. */
3316 if (decl == NULL || TARGET_THUMB)
3319 /* The PIC register is live on entry to VxWorks PLT entries, so we
3320 must make the call before restoring the PIC register. */
3321 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
3324 /* Cannot tail-call to long calls, since these are out of range of
3325 a branch instruction. */
3326 if (arm_is_long_call_p (decl))
3329 /* If we are interworking and the function is not declared static
3330 then we can't tail-call it unless we know that it exists in this
3331 compilation unit (since it might be a Thumb routine). */
3332 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3335 func_type = arm_current_func_type ();
3336 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3337 if (IS_INTERRUPT (func_type))
3340 /* Never tailcall if function may be called with a misaligned SP. */
3341 if (IS_STACKALIGN (func_type))
3344 /* Everything else is ok. */
3349 /* Addressing mode support functions. */
3351 /* Return nonzero if X is a legitimate immediate operand when compiling
3352 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
3354 legitimate_pic_operand_p (rtx x)
3356 if (GET_CODE (x) == SYMBOL_REF
3357 || (GET_CODE (x) == CONST
3358 && GET_CODE (XEXP (x, 0)) == PLUS
3359 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3365 /* Record that the current function needs a PIC register. Initialize
3366 cfun->machine->pic_reg if we have not already done so. */
3369 require_pic_register (void)
3371 /* A lot of the logic here is made obscure by the fact that this
3372 routine gets called as part of the rtx cost estimation process.
3373 We don't want those calls to affect any assumptions about the real
3374 function; and further, we can't call entry_of_function() until we
3375 start the real expansion process. */
3376 if (!current_function_uses_pic_offset_table)
3378 gcc_assert (can_create_pseudo_p ());
3379 if (arm_pic_register != INVALID_REGNUM)
3381 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3383 /* Play games to avoid marking the function as needing pic
3384 if we are being called as part of the cost-estimation
3386 if (current_ir_type () != IR_GIMPLE)
3387 current_function_uses_pic_offset_table = 1;
3393 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3395 /* Play games to avoid marking the function as needing pic
3396 if we are being called as part of the cost-estimation
3398 if (current_ir_type () != IR_GIMPLE)
3400 current_function_uses_pic_offset_table = 1;
3403 arm_load_pic_register (0UL);
3407 emit_insn_after (seq, entry_of_function ());
3414 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3416 if (GET_CODE (orig) == SYMBOL_REF
3417 || GET_CODE (orig) == LABEL_REF)
3419 rtx pic_ref, address;
3423 /* If this function doesn't have a pic register, create one now. */
3424 require_pic_register ();
3428 gcc_assert (can_create_pseudo_p ());
3429 reg = gen_reg_rtx (Pmode);
3435 address = gen_reg_rtx (Pmode);
3440 emit_insn (gen_pic_load_addr_arm (address, orig));
3441 else if (TARGET_THUMB2)
3442 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
3443 else /* TARGET_THUMB1 */
3444 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
3446 /* VxWorks does not impose a fixed gap between segments; the run-time
3447 gap can be different from the object-file gap. We therefore can't
3448 use GOTOFF unless we are absolutely sure that the symbol is in the
3449 same segment as the GOT. Unfortunately, the flexibility of linker
3450 scripts means that we can't be sure of that in general, so assume
3451 that GOTOFF is never valid on VxWorks. */
3452 if ((GET_CODE (orig) == LABEL_REF
3453 || (GET_CODE (orig) == SYMBOL_REF &&
3454 SYMBOL_REF_LOCAL_P (orig)))
3456 && !TARGET_VXWORKS_RTP)
3457 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3460 pic_ref = gen_const_mem (Pmode,
3461 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3465 insn = emit_move_insn (reg, pic_ref);
3467 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3469 set_unique_reg_note (insn, REG_EQUAL, orig);
3473 else if (GET_CODE (orig) == CONST)
3477 if (GET_CODE (XEXP (orig, 0)) == PLUS
3478 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3481 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3482 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3487 gcc_assert (can_create_pseudo_p ());
3488 reg = gen_reg_rtx (Pmode);
3491 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3493 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3494 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3495 base == reg ? 0 : reg);
3497 if (GET_CODE (offset) == CONST_INT)
3499 /* The base register doesn't really matter, we only want to
3500 test the index for the appropriate mode. */
3501 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3503 gcc_assert (can_create_pseudo_p ());
3504 offset = force_reg (Pmode, offset);
3507 if (GET_CODE (offset) == CONST_INT)
3508 return plus_constant (base, INTVAL (offset));
3511 if (GET_MODE_SIZE (mode) > 4
3512 && (GET_MODE_CLASS (mode) == MODE_INT
3513 || TARGET_SOFT_FLOAT))
3515 emit_insn (gen_addsi3 (reg, base, offset));
3519 return gen_rtx_PLUS (Pmode, base, offset);
3526 /* Find a spare register to use during the prolog of a function. */
3529 thumb_find_work_register (unsigned long pushed_regs_mask)
3533 /* Check the argument registers first as these are call-used. The
3534 register allocation order means that sometimes r3 might be used
3535 but earlier argument registers might not, so check them all. */
3536 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3537 if (!df_regs_ever_live_p (reg))
3540 /* Before going on to check the call-saved registers we can try a couple
3541 more ways of deducing that r3 is available. The first is when we are
3542 pushing anonymous arguments onto the stack and we have less than 4
3543 registers worth of fixed arguments(*). In this case r3 will be part of
3544 the variable argument list and so we can be sure that it will be
3545 pushed right at the start of the function. Hence it will be available
3546 for the rest of the prologue.
3547 (*): ie current_function_pretend_args_size is greater than 0. */
3548 if (cfun->machine->uses_anonymous_args
3549 && current_function_pretend_args_size > 0)
3550 return LAST_ARG_REGNUM;
3552 /* The other case is when we have fixed arguments but less than 4 registers
3553 worth. In this case r3 might be used in the body of the function, but
3554 it is not being used to convey an argument into the function. In theory
3555 we could just check current_function_args_size to see how many bytes are
3556 being passed in argument registers, but it seems that it is unreliable.
3557 Sometimes it will have the value 0 when in fact arguments are being
3558 passed. (See testcase execute/20021111-1.c for an example). So we also
3559 check the args_info.nregs field as well. The problem with this field is
3560 that it makes no allowances for arguments that are passed to the
3561 function but which are not used. Hence we could miss an opportunity
3562 when a function has an unused argument in r3. But it is better to be
3563 safe than to be sorry. */
3564 if (! cfun->machine->uses_anonymous_args
3565 && current_function_args_size >= 0
3566 && current_function_args_size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3567 && cfun->args_info.nregs < 4)
3568 return LAST_ARG_REGNUM;
3570 /* Otherwise look for a call-saved register that is going to be pushed. */
3571 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3572 if (pushed_regs_mask & (1 << reg))
3577 /* Thumb-2 can use high regs. */
3578 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
3579 if (pushed_regs_mask & (1 << reg))
3582 /* Something went wrong - thumb_compute_save_reg_mask()
3583 should have arranged for a suitable register to be pushed. */
3587 static GTY(()) int pic_labelno;
3589 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3593 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3595 rtx l1, labelno, pic_tmp, pic_tmp2, pic_rtx, pic_reg;
3596 rtx global_offset_table;
3598 if (current_function_uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3601 gcc_assert (flag_pic);
3603 pic_reg = cfun->machine->pic_reg;
3604 if (TARGET_VXWORKS_RTP)
3606 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
3607 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3608 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3610 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
3612 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
3613 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
3617 /* We use an UNSPEC rather than a LABEL_REF because this label
3618 never appears in the code stream. */
3620 labelno = GEN_INT (pic_labelno++);
3621 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3622 l1 = gen_rtx_CONST (VOIDmode, l1);
3625 = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3626 /* On the ARM the PC register contains 'dot + 8' at the time of the
3627 addition, on the Thumb it is 'dot + 4'. */
3628 pic_tmp = plus_constant (l1, TARGET_ARM ? 8 : 4);
3631 pic_tmp2 = gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx);
3632 pic_tmp2 = gen_rtx_CONST (VOIDmode, pic_tmp2);
3635 pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
3637 pic_rtx = gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp);
3638 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3642 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3643 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
3645 else if (TARGET_THUMB2)
3647 /* Thumb-2 only allows very limited access to the PC. Calculate the
3648 address in a temporary register. */
3649 if (arm_pic_register != INVALID_REGNUM)
3651 pic_tmp = gen_rtx_REG (SImode,
3652 thumb_find_work_register (saved_regs));
3656 gcc_assert (can_create_pseudo_p ());
3657 pic_tmp = gen_reg_rtx (Pmode);
3660 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
3661 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
3662 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
3664 else /* TARGET_THUMB1 */
3666 if (arm_pic_register != INVALID_REGNUM
3667 && REGNO (pic_reg) > LAST_LO_REGNUM)
3669 /* We will have pushed the pic register, so we should always be
3670 able to find a work register. */
3671 pic_tmp = gen_rtx_REG (SImode,
3672 thumb_find_work_register (saved_regs));
3673 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
3674 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3677 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
3678 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
3682 /* Need to emit this whether or not we obey regdecls,
3683 since setjmp/longjmp can cause life info to screw up. */
3684 emit_insn (gen_rtx_USE (VOIDmode, pic_reg));
3688 /* Return nonzero if X is valid as an ARM state addressing register. */
3690 arm_address_register_rtx_p (rtx x, int strict_p)
3694 if (GET_CODE (x) != REG)
3700 return ARM_REGNO_OK_FOR_BASE_P (regno);
3702 return (regno <= LAST_ARM_REGNUM
3703 || regno >= FIRST_PSEUDO_REGISTER
3704 || regno == FRAME_POINTER_REGNUM
3705 || regno == ARG_POINTER_REGNUM);
3708 /* Return TRUE if this rtx is the difference of a symbol and a label,
3709 and will reduce to a PC-relative relocation in the object file.
3710 Expressions like this can be left alone when generating PIC, rather
3711 than forced through the GOT. */
3713 pcrel_constant_p (rtx x)
3715 if (GET_CODE (x) == MINUS)
3716 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
3721 /* Return nonzero if X is a valid ARM state address operand. */
3723 arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3727 enum rtx_code code = GET_CODE (x);
3729 if (arm_address_register_rtx_p (x, strict_p))
3732 use_ldrd = (TARGET_LDRD
3734 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3736 if (code == POST_INC || code == PRE_DEC
3737 || ((code == PRE_INC || code == POST_DEC)
3738 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3739 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3741 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3742 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3743 && GET_CODE (XEXP (x, 1)) == PLUS
3744 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3746 rtx addend = XEXP (XEXP (x, 1), 1);
3748 /* Don't allow ldrd post increment by register because it's hard
3749 to fixup invalid register choices. */
3751 && GET_CODE (x) == POST_MODIFY
3752 && GET_CODE (addend) == REG)
3755 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3756 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3759 /* After reload constants split into minipools will have addresses
3760 from a LABEL_REF. */
3761 else if (reload_completed
3762 && (code == LABEL_REF
3764 && GET_CODE (XEXP (x, 0)) == PLUS
3765 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3766 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3769 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3772 else if (code == PLUS)
3774 rtx xop0 = XEXP (x, 0);
3775 rtx xop1 = XEXP (x, 1);
3777 return ((arm_address_register_rtx_p (xop0, strict_p)
3778 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3779 || (arm_address_register_rtx_p (xop1, strict_p)
3780 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3784 /* Reload currently can't handle MINUS, so disable this for now */
3785 else if (GET_CODE (x) == MINUS)
3787 rtx xop0 = XEXP (x, 0);
3788 rtx xop1 = XEXP (x, 1);
3790 return (arm_address_register_rtx_p (xop0, strict_p)
3791 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3795 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3796 && code == SYMBOL_REF
3797 && CONSTANT_POOL_ADDRESS_P (x)
3799 && symbol_mentioned_p (get_pool_constant (x))
3800 && ! pcrel_constant_p (get_pool_constant (x))))
3806 /* Return nonzero if X is a valid Thumb-2 address operand. */
3808 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
3811 enum rtx_code code = GET_CODE (x);
3813 if (arm_address_register_rtx_p (x, strict_p))
3816 use_ldrd = (TARGET_LDRD
3818 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3820 if (code == POST_INC || code == PRE_DEC
3821 || ((code == PRE_INC || code == POST_DEC)
3822 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3823 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3825 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3826 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3827 && GET_CODE (XEXP (x, 1)) == PLUS
3828 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3830 /* Thumb-2 only has autoincrement by constant. */
3831 rtx addend = XEXP (XEXP (x, 1), 1);
3832 HOST_WIDE_INT offset;
3834 if (GET_CODE (addend) != CONST_INT)
3837 offset = INTVAL(addend);
3838 if (GET_MODE_SIZE (mode) <= 4)
3839 return (offset > -256 && offset < 256);
3841 return (use_ldrd && offset > -1024 && offset < 1024
3842 && (offset & 3) == 0);
3845 /* After reload constants split into minipools will have addresses
3846 from a LABEL_REF. */
3847 else if (reload_completed
3848 && (code == LABEL_REF
3850 && GET_CODE (XEXP (x, 0)) == PLUS
3851 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3852 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3855 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3858 else if (code == PLUS)
3860 rtx xop0 = XEXP (x, 0);
3861 rtx xop1 = XEXP (x, 1);
3863 return ((arm_address_register_rtx_p (xop0, strict_p)
3864 && thumb2_legitimate_index_p (mode, xop1, strict_p))
3865 || (arm_address_register_rtx_p (xop1, strict_p)
3866 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
3869 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3870 && code == SYMBOL_REF
3871 && CONSTANT_POOL_ADDRESS_P (x)
3873 && symbol_mentioned_p (get_pool_constant (x))
3874 && ! pcrel_constant_p (get_pool_constant (x))))
3880 /* Return nonzero if INDEX is valid for an address index operand in
3883 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
3886 HOST_WIDE_INT range;
3887 enum rtx_code code = GET_CODE (index);
3889 /* Standard coprocessor addressing modes. */
3890 if (TARGET_HARD_FLOAT
3891 && (TARGET_FPA || TARGET_MAVERICK)
3892 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3893 || (TARGET_MAVERICK && mode == DImode)))
3894 return (code == CONST_INT && INTVAL (index) < 1024
3895 && INTVAL (index) > -1024
3896 && (INTVAL (index) & 3) == 0);
3899 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
3900 return (code == CONST_INT
3901 && INTVAL (index) < 1016
3902 && INTVAL (index) > -1024
3903 && (INTVAL (index) & 3) == 0);
3905 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3906 return (code == CONST_INT
3907 && INTVAL (index) < 1024
3908 && INTVAL (index) > -1024
3909 && (INTVAL (index) & 3) == 0);
3911 if (arm_address_register_rtx_p (index, strict_p)
3912 && (GET_MODE_SIZE (mode) <= 4))
3915 if (mode == DImode || mode == DFmode)
3917 if (code == CONST_INT)
3919 HOST_WIDE_INT val = INTVAL (index);
3922 return val > -256 && val < 256;
3924 return val > -4096 && val < 4092;
3927 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
3930 if (GET_MODE_SIZE (mode) <= 4
3933 || (mode == QImode && outer == SIGN_EXTEND))))
3937 rtx xiop0 = XEXP (index, 0);
3938 rtx xiop1 = XEXP (index, 1);
3940 return ((arm_address_register_rtx_p (xiop0, strict_p)
3941 && power_of_two_operand (xiop1, SImode))
3942 || (arm_address_register_rtx_p (xiop1, strict_p)
3943 && power_of_two_operand (xiop0, SImode)));
3945 else if (code == LSHIFTRT || code == ASHIFTRT
3946 || code == ASHIFT || code == ROTATERT)
3948 rtx op = XEXP (index, 1);
3950 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
3951 && GET_CODE (op) == CONST_INT
3953 && INTVAL (op) <= 31);
3957 /* For ARM v4 we may be doing a sign-extend operation during the
3961 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
3967 range = (mode == HImode) ? 4095 : 4096;
3969 return (code == CONST_INT
3970 && INTVAL (index) < range
3971 && INTVAL (index) > -range);
3974 /* Return true if OP is a valid index scaling factor for Thumb-2 address
3975 index operand. i.e. 1, 2, 4 or 8. */
3977 thumb2_index_mul_operand (rtx op)
3981 if (GET_CODE(op) != CONST_INT)
3985 return (val == 1 || val == 2 || val == 4 || val == 8);
3988 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
3990 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
3992 enum rtx_code code = GET_CODE (index);
3994 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
3995 /* Standard coprocessor addressing modes. */
3996 if (TARGET_HARD_FLOAT
3997 && (TARGET_FPA || TARGET_MAVERICK)
3998 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3999 || (TARGET_MAVERICK && mode == DImode)))
4000 return (code == CONST_INT && INTVAL (index) < 1024
4001 && INTVAL (index) > -1024
4002 && (INTVAL (index) & 3) == 0);
4004 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4006 /* For DImode assume values will usually live in core regs
4007 and only allow LDRD addressing modes. */
4008 if (!TARGET_LDRD || mode != DImode)
4009 return (code == CONST_INT
4010 && INTVAL (index) < 1024
4011 && INTVAL (index) > -1024
4012 && (INTVAL (index) & 3) == 0);
4016 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4017 return (code == CONST_INT
4018 && INTVAL (index) < 1016
4019 && INTVAL (index) > -1024
4020 && (INTVAL (index) & 3) == 0);
4022 if (arm_address_register_rtx_p (index, strict_p)
4023 && (GET_MODE_SIZE (mode) <= 4))
4026 if (mode == DImode || mode == DFmode)
4028 HOST_WIDE_INT val = INTVAL (index);
4029 /* ??? Can we assume ldrd for thumb2? */
4030 /* Thumb-2 ldrd only has reg+const addressing modes. */
4031 if (code != CONST_INT)
4034 /* ldrd supports offsets of +-1020.
4035 However the ldr fallback does not. */
4036 return val > -256 && val < 256 && (val & 3) == 0;
4041 rtx xiop0 = XEXP (index, 0);
4042 rtx xiop1 = XEXP (index, 1);
4044 return ((arm_address_register_rtx_p (xiop0, strict_p)
4045 && thumb2_index_mul_operand (xiop1))
4046 || (arm_address_register_rtx_p (xiop1, strict_p)
4047 && thumb2_index_mul_operand (xiop0)));
4049 else if (code == ASHIFT)
4051 rtx op = XEXP (index, 1);
4053 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4054 && GET_CODE (op) == CONST_INT
4056 && INTVAL (op) <= 3);
4059 return (code == CONST_INT
4060 && INTVAL (index) < 4096
4061 && INTVAL (index) > -256);
4064 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
4066 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
4070 if (GET_CODE (x) != REG)
4076 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
4078 return (regno <= LAST_LO_REGNUM
4079 || regno > LAST_VIRTUAL_REGISTER
4080 || regno == FRAME_POINTER_REGNUM
4081 || (GET_MODE_SIZE (mode) >= 4
4082 && (regno == STACK_POINTER_REGNUM
4083 || regno >= FIRST_PSEUDO_REGISTER
4084 || x == hard_frame_pointer_rtx
4085 || x == arg_pointer_rtx)));
4088 /* Return nonzero if x is a legitimate index register. This is the case
4089 for any base register that can access a QImode object. */
4091 thumb1_index_register_rtx_p (rtx x, int strict_p)
4093 return thumb1_base_register_rtx_p (x, QImode, strict_p);
4096 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
4098 The AP may be eliminated to either the SP or the FP, so we use the
4099 least common denominator, e.g. SImode, and offsets from 0 to 64.
4101 ??? Verify whether the above is the right approach.
4103 ??? Also, the FP may be eliminated to the SP, so perhaps that
4104 needs special handling also.
4106 ??? Look at how the mips16 port solves this problem. It probably uses
4107 better ways to solve some of these problems.
4109 Although it is not incorrect, we don't accept QImode and HImode
4110 addresses based on the frame pointer or arg pointer until the
4111 reload pass starts. This is so that eliminating such addresses
4112 into stack based ones won't produce impossible code. */
4114 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4116 /* ??? Not clear if this is right. Experiment. */
4117 if (GET_MODE_SIZE (mode) < 4
4118 && !(reload_in_progress || reload_completed)
4119 && (reg_mentioned_p (frame_pointer_rtx, x)
4120 || reg_mentioned_p (arg_pointer_rtx, x)
4121 || reg_mentioned_p (virtual_incoming_args_rtx, x)
4122 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
4123 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
4124 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
4127 /* Accept any base register. SP only in SImode or larger. */
4128 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
4131 /* This is PC relative data before arm_reorg runs. */
4132 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
4133 && GET_CODE (x) == SYMBOL_REF
4134 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
4137 /* This is PC relative data after arm_reorg runs. */
4138 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
4139 && (GET_CODE (x) == LABEL_REF
4140 || (GET_CODE (x) == CONST
4141 && GET_CODE (XEXP (x, 0)) == PLUS
4142 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4143 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4146 /* Post-inc indexing only supported for SImode and larger. */
4147 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
4148 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
4151 else if (GET_CODE (x) == PLUS)
4153 /* REG+REG address can be any two index registers. */
4154 /* We disallow FRAME+REG addressing since we know that FRAME
4155 will be replaced with STACK, and SP relative addressing only
4156 permits SP+OFFSET. */
4157 if (GET_MODE_SIZE (mode) <= 4
4158 && XEXP (x, 0) != frame_pointer_rtx
4159 && XEXP (x, 1) != frame_pointer_rtx
4160 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4161 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
4164 /* REG+const has 5-7 bit offset for non-SP registers. */
4165 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4166 || XEXP (x, 0) == arg_pointer_rtx)
4167 && GET_CODE (XEXP (x, 1)) == CONST_INT
4168 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4171 /* REG+const has 10-bit offset for SP, but only SImode and
4172 larger is supported. */
4173 /* ??? Should probably check for DI/DFmode overflow here
4174 just like GO_IF_LEGITIMATE_OFFSET does. */
4175 else if (GET_CODE (XEXP (x, 0)) == REG
4176 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
4177 && GET_MODE_SIZE (mode) >= 4
4178 && GET_CODE (XEXP (x, 1)) == CONST_INT
4179 && INTVAL (XEXP (x, 1)) >= 0
4180 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
4181 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4184 else if (GET_CODE (XEXP (x, 0)) == REG
4185 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
4186 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
4187 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
4188 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
4189 && GET_MODE_SIZE (mode) >= 4
4190 && GET_CODE (XEXP (x, 1)) == CONST_INT
4191 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4195 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4196 && GET_MODE_SIZE (mode) == 4
4197 && GET_CODE (x) == SYMBOL_REF
4198 && CONSTANT_POOL_ADDRESS_P (x)
4200 && symbol_mentioned_p (get_pool_constant (x))
4201 && ! pcrel_constant_p (get_pool_constant (x))))
4207 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
4208 instruction of mode MODE. */
4210 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
4212 switch (GET_MODE_SIZE (mode))
4215 return val >= 0 && val < 32;
4218 return val >= 0 && val < 64 && (val & 1) == 0;
4222 && (val + GET_MODE_SIZE (mode)) <= 128
4227 /* Build the SYMBOL_REF for __tls_get_addr. */
4229 static GTY(()) rtx tls_get_addr_libfunc;
4232 get_tls_get_addr (void)
4234 if (!tls_get_addr_libfunc)
4235 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
4236 return tls_get_addr_libfunc;
4240 arm_load_tp (rtx target)
4243 target = gen_reg_rtx (SImode);
4247 /* Can return in any reg. */
4248 emit_insn (gen_load_tp_hard (target));
4252 /* Always returned in r0. Immediately copy the result into a pseudo,
4253 otherwise other uses of r0 (e.g. setting up function arguments) may
4254 clobber the value. */
4258 emit_insn (gen_load_tp_soft ());
4260 tmp = gen_rtx_REG (SImode, 0);
4261 emit_move_insn (target, tmp);
4267 load_tls_operand (rtx x, rtx reg)
4271 if (reg == NULL_RTX)
4272 reg = gen_reg_rtx (SImode);
4274 tmp = gen_rtx_CONST (SImode, x);
4276 emit_move_insn (reg, tmp);
4282 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
4284 rtx insns, label, labelno, sum;
4288 labelno = GEN_INT (pic_labelno++);
4289 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4290 label = gen_rtx_CONST (VOIDmode, label);
4292 sum = gen_rtx_UNSPEC (Pmode,
4293 gen_rtvec (4, x, GEN_INT (reloc), label,
4294 GEN_INT (TARGET_ARM ? 8 : 4)),
4296 reg = load_tls_operand (sum, reg);
4299 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
4300 else if (TARGET_THUMB2)
4303 /* Thumb-2 only allows very limited access to the PC. Calculate
4304 the address in a temporary register. */
4305 tmp = gen_reg_rtx (SImode);
4306 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4307 emit_insn (gen_addsi3(reg, reg, tmp));
4309 else /* TARGET_THUMB1 */
4310 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4312 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
4313 Pmode, 1, reg, Pmode);
4315 insns = get_insns ();
4322 legitimize_tls_address (rtx x, rtx reg)
4324 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
4325 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
4329 case TLS_MODEL_GLOBAL_DYNAMIC:
4330 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
4331 dest = gen_reg_rtx (Pmode);
4332 emit_libcall_block (insns, dest, ret, x);
4335 case TLS_MODEL_LOCAL_DYNAMIC:
4336 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
4338 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
4339 share the LDM result with other LD model accesses. */
4340 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
4342 dest = gen_reg_rtx (Pmode);
4343 emit_libcall_block (insns, dest, ret, eqv);
4345 /* Load the addend. */
4346 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
4348 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
4349 return gen_rtx_PLUS (Pmode, dest, addend);
4351 case TLS_MODEL_INITIAL_EXEC:
4352 labelno = GEN_INT (pic_labelno++);
4353 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4354 label = gen_rtx_CONST (VOIDmode, label);
4355 sum = gen_rtx_UNSPEC (Pmode,
4356 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
4357 GEN_INT (TARGET_ARM ? 8 : 4)),
4359 reg = load_tls_operand (sum, reg);
4362 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
4363 else if (TARGET_THUMB2)
4366 /* Thumb-2 only allows very limited access to the PC. Calculate
4367 the address in a temporary register. */
4368 tmp = gen_reg_rtx (SImode);
4369 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4370 emit_insn (gen_addsi3(reg, reg, tmp));
4371 emit_move_insn (reg, gen_const_mem (SImode, reg));
4375 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4376 emit_move_insn (reg, gen_const_mem (SImode, reg));
4379 tp = arm_load_tp (NULL_RTX);
4381 return gen_rtx_PLUS (Pmode, tp, reg);
4383 case TLS_MODEL_LOCAL_EXEC:
4384 tp = arm_load_tp (NULL_RTX);
4386 reg = gen_rtx_UNSPEC (Pmode,
4387 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
4389 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
4391 return gen_rtx_PLUS (Pmode, tp, reg);
4398 /* Try machine-dependent ways of modifying an illegitimate address
4399 to be legitimate. If we find one, return the new, valid address. */
4401 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4403 if (arm_tls_symbol_p (x))
4404 return legitimize_tls_address (x, NULL_RTX);
4406 if (GET_CODE (x) == PLUS)
4408 rtx xop0 = XEXP (x, 0);
4409 rtx xop1 = XEXP (x, 1);
4411 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4412 xop0 = force_reg (SImode, xop0);
4414 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4415 xop1 = force_reg (SImode, xop1);
4417 if (ARM_BASE_REGISTER_RTX_P (xop0)
4418 && GET_CODE (xop1) == CONST_INT)
4420 HOST_WIDE_INT n, low_n;
4424 /* VFP addressing modes actually allow greater offsets, but for
4425 now we just stick with the lowest common denominator. */
4427 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4439 low_n = ((mode) == TImode ? 0
4440 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4444 base_reg = gen_reg_rtx (SImode);
4445 val = force_operand (plus_constant (xop0, n), NULL_RTX);
4446 emit_move_insn (base_reg, val);
4447 x = plus_constant (base_reg, low_n);
4449 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4450 x = gen_rtx_PLUS (SImode, xop0, xop1);
4453 /* XXX We don't allow MINUS any more -- see comment in
4454 arm_legitimate_address_p (). */
4455 else if (GET_CODE (x) == MINUS)
4457 rtx xop0 = XEXP (x, 0);
4458 rtx xop1 = XEXP (x, 1);
4460 if (CONSTANT_P (xop0))
4461 xop0 = force_reg (SImode, xop0);
4463 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4464 xop1 = force_reg (SImode, xop1);
4466 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4467 x = gen_rtx_MINUS (SImode, xop0, xop1);
4470 /* Make sure to take full advantage of the pre-indexed addressing mode
4471 with absolute addresses which often allows for the base register to
4472 be factorized for multiple adjacent memory references, and it might
4473 even allows for the mini pool to be avoided entirely. */
4474 else if (GET_CODE (x) == CONST_INT && optimize > 0)
4477 HOST_WIDE_INT mask, base, index;
4480 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
4481 use a 8-bit index. So let's use a 12-bit index for SImode only and
4482 hope that arm_gen_constant will enable ldrb to use more bits. */
4483 bits = (mode == SImode) ? 12 : 8;
4484 mask = (1 << bits) - 1;
4485 base = INTVAL (x) & ~mask;
4486 index = INTVAL (x) & mask;
4487 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4489 /* It'll most probably be more efficient to generate the base
4490 with more bits set and use a negative index instead. */
4494 base_reg = force_reg (SImode, GEN_INT (base));
4495 x = plus_constant (base_reg, index);
4500 /* We need to find and carefully transform any SYMBOL and LABEL
4501 references; so go back to the original address expression. */
4502 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4504 if (new_x != orig_x)
4512 /* Try machine-dependent ways of modifying an illegitimate Thumb address
4513 to be legitimate. If we find one, return the new, valid address. */
4515 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4517 if (arm_tls_symbol_p (x))
4518 return legitimize_tls_address (x, NULL_RTX);
4520 if (GET_CODE (x) == PLUS
4521 && GET_CODE (XEXP (x, 1)) == CONST_INT
4522 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4523 || INTVAL (XEXP (x, 1)) < 0))
4525 rtx xop0 = XEXP (x, 0);
4526 rtx xop1 = XEXP (x, 1);
4527 HOST_WIDE_INT offset = INTVAL (xop1);
4529 /* Try and fold the offset into a biasing of the base register and
4530 then offsetting that. Don't do this when optimizing for space
4531 since it can cause too many CSEs. */
4532 if (optimize_size && offset >= 0
4533 && offset < 256 + 31 * GET_MODE_SIZE (mode))
4535 HOST_WIDE_INT delta;
4538 delta = offset - (256 - GET_MODE_SIZE (mode));
4539 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4540 delta = 31 * GET_MODE_SIZE (mode);
4542 delta = offset & (~31 * GET_MODE_SIZE (mode));
4544 xop0 = force_operand (plus_constant (xop0, offset - delta),
4546 x = plus_constant (xop0, delta);
4548 else if (offset < 0 && offset > -256)
4549 /* Small negative offsets are best done with a subtract before the
4550 dereference, forcing these into a register normally takes two
4552 x = force_operand (x, NULL_RTX);
4555 /* For the remaining cases, force the constant into a register. */
4556 xop1 = force_reg (SImode, xop1);
4557 x = gen_rtx_PLUS (SImode, xop0, xop1);
4560 else if (GET_CODE (x) == PLUS
4561 && s_register_operand (XEXP (x, 1), SImode)
4562 && !s_register_operand (XEXP (x, 0), SImode))
4564 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4566 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4571 /* We need to find and carefully transform any SYMBOL and LABEL
4572 references; so go back to the original address expression. */
4573 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4575 if (new_x != orig_x)
4583 thumb_legitimize_reload_address (rtx *x_p,
4584 enum machine_mode mode,
4585 int opnum, int type,
4586 int ind_levels ATTRIBUTE_UNUSED)
4590 if (GET_CODE (x) == PLUS
4591 && GET_MODE_SIZE (mode) < 4
4592 && REG_P (XEXP (x, 0))
4593 && XEXP (x, 0) == stack_pointer_rtx
4594 && GET_CODE (XEXP (x, 1)) == CONST_INT
4595 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4600 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4601 Pmode, VOIDmode, 0, 0, opnum, type);
4605 /* If both registers are hi-regs, then it's better to reload the
4606 entire expression rather than each register individually. That
4607 only requires one reload register rather than two. */
4608 if (GET_CODE (x) == PLUS
4609 && REG_P (XEXP (x, 0))
4610 && REG_P (XEXP (x, 1))
4611 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4612 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4617 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4618 Pmode, VOIDmode, 0, 0, opnum, type);
4625 /* Test for various thread-local symbols. */
4627 /* Return TRUE if X is a thread-local symbol. */
4630 arm_tls_symbol_p (rtx x)
4632 if (! TARGET_HAVE_TLS)
4635 if (GET_CODE (x) != SYMBOL_REF)
4638 return SYMBOL_REF_TLS_MODEL (x) != 0;
4641 /* Helper for arm_tls_referenced_p. */
4644 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
4646 if (GET_CODE (*x) == SYMBOL_REF)
4647 return SYMBOL_REF_TLS_MODEL (*x) != 0;
4649 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
4650 TLS offsets, not real symbol references. */
4651 if (GET_CODE (*x) == UNSPEC
4652 && XINT (*x, 1) == UNSPEC_TLS)
4658 /* Return TRUE if X contains any TLS symbol references. */
4661 arm_tls_referenced_p (rtx x)
4663 if (! TARGET_HAVE_TLS)
4666 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
4669 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
4672 arm_cannot_force_const_mem (rtx x)
4676 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
4678 split_const (x, &base, &offset);
4679 if (GET_CODE (base) == SYMBOL_REF
4680 && !offset_within_block_p (base, INTVAL (offset)))
4683 return arm_tls_referenced_p (x);
4686 #define REG_OR_SUBREG_REG(X) \
4687 (GET_CODE (X) == REG \
4688 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
4690 #define REG_OR_SUBREG_RTX(X) \
4691 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
4693 #ifndef COSTS_N_INSNS
4694 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
4697 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
4699 enum machine_mode mode = GET_MODE (x);
4712 return COSTS_N_INSNS (1);
4715 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4718 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
4725 return COSTS_N_INSNS (2) + cycles;
4727 return COSTS_N_INSNS (1) + 16;
4730 return (COSTS_N_INSNS (1)
4731 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
4732 + GET_CODE (SET_DEST (x)) == MEM));
4737 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
4739 if (thumb_shiftable_const (INTVAL (x)))
4740 return COSTS_N_INSNS (2);
4741 return COSTS_N_INSNS (3);
4743 else if ((outer == PLUS || outer == COMPARE)
4744 && INTVAL (x) < 256 && INTVAL (x) > -256)
4746 else if (outer == AND
4747 && INTVAL (x) < 256 && INTVAL (x) >= -256)
4748 return COSTS_N_INSNS (1);
4749 else if (outer == ASHIFT || outer == ASHIFTRT
4750 || outer == LSHIFTRT)
4752 return COSTS_N_INSNS (2);
4758 return COSTS_N_INSNS (3);
4776 /* XXX another guess. */
4777 /* Memory costs quite a lot for the first word, but subsequent words
4778 load at the equivalent of a single insn each. */
4779 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4780 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
4785 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4790 /* XXX still guessing. */
4791 switch (GET_MODE (XEXP (x, 0)))
4794 return (1 + (mode == DImode ? 4 : 0)
4795 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4798 return (4 + (mode == DImode ? 4 : 0)
4799 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4802 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4814 /* Worker routine for arm_rtx_costs. */
4815 /* ??? This needs updating for thumb2. */
4817 arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
4819 enum machine_mode mode = GET_MODE (x);
4820 enum rtx_code subcode;
4826 /* Memory costs quite a lot for the first word, but subsequent words
4827 load at the equivalent of a single insn each. */
4828 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4829 + (GET_CODE (x) == SYMBOL_REF
4830 && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
4836 return optimize_size ? COSTS_N_INSNS (2) : 100;
4839 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4846 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
4848 return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
4849 + ((GET_CODE (XEXP (x, 0)) == REG
4850 || (GET_CODE (XEXP (x, 0)) == SUBREG
4851 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4853 return (1 + ((GET_CODE (XEXP (x, 0)) == REG
4854 || (GET_CODE (XEXP (x, 0)) == SUBREG
4855 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4857 + ((GET_CODE (XEXP (x, 1)) == REG
4858 || (GET_CODE (XEXP (x, 1)) == SUBREG
4859 && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
4860 || (GET_CODE (XEXP (x, 1)) == CONST_INT))
4864 if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2)
4866 extra_cost = rtx_cost (XEXP (x, 1), code);
4867 if (!REG_OR_SUBREG_REG (XEXP (x, 0)))
4868 extra_cost += 4 * ARM_NUM_REGS (mode);
4873 return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
4874 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4875 || (GET_CODE (XEXP (x, 0)) == CONST_INT
4876 && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
4879 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4880 return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4881 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4882 && arm_const_double_rtx (XEXP (x, 1))))
4884 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4885 || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
4886 && arm_const_double_rtx (XEXP (x, 0))))
4889 if (((GET_CODE (XEXP (x, 0)) == CONST_INT
4890 && const_ok_for_arm (INTVAL (XEXP (x, 0)))
4891 && REG_OR_SUBREG_REG (XEXP (x, 1))))
4892 || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
4893 || subcode == ASHIFTRT || subcode == LSHIFTRT
4894 || subcode == ROTATE || subcode == ROTATERT
4896 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4897 && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
4898 (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
4899 && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
4900 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
4901 || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
4902 && REG_OR_SUBREG_REG (XEXP (x, 0))))
4907 if (GET_CODE (XEXP (x, 0)) == MULT)
4909 extra_cost = rtx_cost (XEXP (x, 0), code);
4910 if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
4911 extra_cost += 4 * ARM_NUM_REGS (mode);
4915 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4916 return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4917 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4918 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4919 && arm_const_double_rtx (XEXP (x, 1))))
4923 case AND: case XOR: case IOR:
4926 /* Normally the frame registers will be spilt into reg+const during
4927 reload, so it is a bad idea to combine them with other instructions,
4928 since then they might not be moved outside of loops. As a compromise
4929 we allow integration with ops that have a constant as their second
4931 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
4932 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
4933 && GET_CODE (XEXP (x, 1)) != CONST_INT)
4934 || (REG_OR_SUBREG_REG (XEXP (x, 0))
4935 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
4939 return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4940 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4941 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4942 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4945 if (REG_OR_SUBREG_REG (XEXP (x, 0)))
4946 return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
4947 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4948 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4949 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4952 else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
4953 return (1 + extra_cost
4954 + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
4955 || subcode == LSHIFTRT || subcode == ASHIFTRT
4956 || subcode == ROTATE || subcode == ROTATERT
4958 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4959 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
4960 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
4961 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
4962 && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
4963 || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
4969 /* This should have been handled by the CPU specific routines. */
4973 if (arm_arch3m && mode == SImode
4974 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
4975 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4976 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
4977 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
4978 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
4979 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
4984 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4985 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
4989 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4991 return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4994 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5002 return 4 + (mode == DImode ? 4 : 0);
5005 /* ??? value extensions are cheaper on armv6. */
5006 if (GET_MODE (XEXP (x, 0)) == QImode)
5007 return (4 + (mode == DImode ? 4 : 0)
5008 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5011 switch (GET_MODE (XEXP (x, 0)))
5014 return (1 + (mode == DImode ? 4 : 0)
5015 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5018 return (4 + (mode == DImode ? 4 : 0)
5019 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5022 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5037 if (const_ok_for_arm (INTVAL (x)))
5038 return outer == SET ? 2 : -1;
5039 else if (outer == AND
5040 && const_ok_for_arm (~INTVAL (x)))
5042 else if ((outer == COMPARE
5043 || outer == PLUS || outer == MINUS)
5044 && const_ok_for_arm (-INTVAL (x)))
5055 if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
5056 return outer == SET ? 2 : -1;
5057 else if ((outer == COMPARE || outer == PLUS)
5058 && neg_const_double_rtx_ok_for_fpa (x))
5067 /* RTX costs when optimizing for size. */
5069 arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
5071 enum machine_mode mode = GET_MODE (x);
5075 /* XXX TBD. For now, use the standard costs. */
5076 *total = thumb1_rtx_costs (x, code, outer_code);
5083 /* A memory access costs 1 insn if the mode is small, or the address is
5084 a single register, otherwise it costs one insn per word. */
5085 if (REG_P (XEXP (x, 0)))
5086 *total = COSTS_N_INSNS (1);
5088 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5095 /* Needs a libcall, so it costs about this. */
5096 *total = COSTS_N_INSNS (2);
5100 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
5102 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
5110 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
5112 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
5115 else if (mode == SImode)
5117 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
5118 /* Slightly disparage register shifts, but not by much. */
5119 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5120 *total += 1 + rtx_cost (XEXP (x, 1), code);
5124 /* Needs a libcall. */
5125 *total = COSTS_N_INSNS (2);
5129 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5131 *total = COSTS_N_INSNS (1);
5137 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
5138 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
5140 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
5141 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
5142 || subcode1 == ROTATE || subcode1 == ROTATERT
5143 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
5144 || subcode1 == ASHIFTRT)
5146 /* It's just the cost of the two operands. */
5151 *total = COSTS_N_INSNS (1);
5155 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5159 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5161 *total = COSTS_N_INSNS (1);
5166 case AND: case XOR: case IOR:
5169 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
5171 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
5172 || subcode == LSHIFTRT || subcode == ASHIFTRT
5173 || (code == AND && subcode == NOT))
5175 /* It's just the cost of the two operands. */
5181 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5185 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5189 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5190 *total = COSTS_N_INSNS (1);
5193 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5202 if (cc_register (XEXP (x, 0), VOIDmode))
5205 *total = COSTS_N_INSNS (1);
5209 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5210 *total = COSTS_N_INSNS (1);
5212 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
5217 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
5219 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5220 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5223 *total += COSTS_N_INSNS (1);
5228 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5230 switch (GET_MODE (XEXP (x, 0)))
5233 *total += COSTS_N_INSNS (1);
5237 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5243 *total += COSTS_N_INSNS (2);
5248 *total += COSTS_N_INSNS (1);
5253 if (const_ok_for_arm (INTVAL (x)))
5254 *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
5255 else if (const_ok_for_arm (~INTVAL (x)))
5256 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
5257 else if (const_ok_for_arm (-INTVAL (x)))
5259 if (outer_code == COMPARE || outer_code == PLUS
5260 || outer_code == MINUS)
5263 *total = COSTS_N_INSNS (1);
5266 *total = COSTS_N_INSNS (2);
5272 *total = COSTS_N_INSNS (2);
5276 *total = COSTS_N_INSNS (4);
5280 if (mode != VOIDmode)
5281 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5283 *total = COSTS_N_INSNS (4); /* How knows? */
5288 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
5289 supported on any "slowmul" cores, so it can be ignored. */
5292 arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5294 enum machine_mode mode = GET_MODE (x);
5298 *total = thumb1_rtx_costs (x, code, outer_code);
5305 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5312 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5314 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5315 & (unsigned HOST_WIDE_INT) 0xffffffff);
5316 int cost, const_ok = const_ok_for_arm (i);
5317 int j, booth_unit_size;
5319 /* Tune as appropriate. */
5320 cost = const_ok ? 4 : 8;
5321 booth_unit_size = 2;
5322 for (j = 0; i && j < 32; j += booth_unit_size)
5324 i >>= booth_unit_size;
5332 *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5333 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5337 *total = arm_rtx_costs_1 (x, code, outer_code);
5343 /* RTX cost for cores with a fast multiply unit (M variants). */
5346 arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5348 enum machine_mode mode = GET_MODE (x);
5352 *total = thumb1_rtx_costs (x, code, outer_code);
5356 /* ??? should thumb2 use different costs? */
5360 /* There is no point basing this on the tuning, since it is always the
5361 fast variant if it exists at all. */
5363 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5364 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5365 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5372 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5379 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5381 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5382 & (unsigned HOST_WIDE_INT) 0xffffffff);
5383 int cost, const_ok = const_ok_for_arm (i);
5384 int j, booth_unit_size;
5386 /* Tune as appropriate. */
5387 cost = const_ok ? 4 : 8;
5388 booth_unit_size = 8;
5389 for (j = 0; i && j < 32; j += booth_unit_size)
5391 i >>= booth_unit_size;
5399 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5400 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5404 *total = arm_rtx_costs_1 (x, code, outer_code);
5410 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
5411 so it can be ignored. */
5414 arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
5416 enum machine_mode mode = GET_MODE (x);
5420 *total = thumb1_rtx_costs (x, code, outer_code);
5427 /* There is no point basing this on the tuning, since it is always the
5428 fast variant if it exists at all. */
5430 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5431 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5432 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5439 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5446 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5448 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5449 & (unsigned HOST_WIDE_INT) 0xffffffff);
5450 int cost, const_ok = const_ok_for_arm (i);
5451 unsigned HOST_WIDE_INT masked_const;
5453 /* The cost will be related to two insns.
5454 First a load of the constant (MOV or LDR), then a multiply. */
5457 cost += 1; /* LDR is probably more expensive because
5458 of longer result latency. */
5459 masked_const = i & 0xffff8000;
5460 if (masked_const != 0 && masked_const != 0xffff8000)
5462 masked_const = i & 0xf8000000;
5463 if (masked_const == 0 || masked_const == 0xf8000000)
5472 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5473 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5477 /* A COMPARE of a MULT is slow on XScale; the muls instruction
5478 will stall until the multiplication is complete. */
5479 if (GET_CODE (XEXP (x, 0)) == MULT)
5480 *total = 4 + rtx_cost (XEXP (x, 0), code);
5482 *total = arm_rtx_costs_1 (x, code, outer_code);
5486 *total = arm_rtx_costs_1 (x, code, outer_code);
5492 /* RTX costs for 9e (and later) cores. */
5495 arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
5497 enum machine_mode mode = GET_MODE (x);
5506 *total = COSTS_N_INSNS (3);
5510 *total = thumb1_rtx_costs (x, code, outer_code);
5518 /* There is no point basing this on the tuning, since it is always the
5519 fast variant if it exists at all. */
5521 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5522 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5523 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5530 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5547 *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
5548 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
5552 *total = arm_rtx_costs_1 (x, code, outer_code);
5556 /* All address computations that can be done are free, but rtx cost returns
5557 the same for practically all of them. So we weight the different types
5558 of address here in the order (most pref first):
5559 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
5561 arm_arm_address_cost (rtx x)
5563 enum rtx_code c = GET_CODE (x);
5565 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
5567 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
5570 if (c == PLUS || c == MINUS)
5572 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5575 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
5585 arm_thumb_address_cost (rtx x)
5587 enum rtx_code c = GET_CODE (x);
5592 && GET_CODE (XEXP (x, 0)) == REG
5593 && GET_CODE (XEXP (x, 1)) == CONST_INT)
5600 arm_address_cost (rtx x)
5602 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
5606 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
5610 /* Some true dependencies can have a higher cost depending
5611 on precisely how certain input operands are used. */
5613 && REG_NOTE_KIND (link) == 0
5614 && recog_memoized (insn) >= 0
5615 && recog_memoized (dep) >= 0)
5617 int shift_opnum = get_attr_shift (insn);
5618 enum attr_type attr_type = get_attr_type (dep);
5620 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
5621 operand for INSN. If we have a shifted input operand and the
5622 instruction we depend on is another ALU instruction, then we may
5623 have to account for an additional stall. */
5624 if (shift_opnum != 0
5625 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
5627 rtx shifted_operand;
5630 /* Get the shifted operand. */
5631 extract_insn (insn);
5632 shifted_operand = recog_data.operand[shift_opnum];
5634 /* Iterate over all the operands in DEP. If we write an operand
5635 that overlaps with SHIFTED_OPERAND, then we have increase the
5636 cost of this dependency. */
5638 preprocess_constraints ();
5639 for (opno = 0; opno < recog_data.n_operands; opno++)
5641 /* We can ignore strict inputs. */
5642 if (recog_data.operand_type[opno] == OP_IN)
5645 if (reg_overlap_mentioned_p (recog_data.operand[opno],
5652 /* XXX This is not strictly true for the FPA. */
5653 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
5654 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
5657 /* Call insns don't incur a stall, even if they follow a load. */
5658 if (REG_NOTE_KIND (link) == 0
5659 && GET_CODE (insn) == CALL_INSN)
5662 if ((i_pat = single_set (insn)) != NULL
5663 && GET_CODE (SET_SRC (i_pat)) == MEM
5664 && (d_pat = single_set (dep)) != NULL
5665 && GET_CODE (SET_DEST (d_pat)) == MEM)
5667 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
5668 /* This is a load after a store, there is no conflict if the load reads
5669 from a cached area. Assume that loads from the stack, and from the
5670 constant pool are cached, and that others will miss. This is a
5673 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
5674 || reg_mentioned_p (stack_pointer_rtx, src_mem)
5675 || reg_mentioned_p (frame_pointer_rtx, src_mem)
5676 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
5683 static int fp_consts_inited = 0;
5685 /* Only zero is valid for VFP. Other values are also valid for FPA. */
5686 static const char * const strings_fp[8] =
5689 "4", "5", "0.5", "10"
5692 static REAL_VALUE_TYPE values_fp[8];
5695 init_fp_table (void)
5701 fp_consts_inited = 1;
5703 fp_consts_inited = 8;
5705 for (i = 0; i < fp_consts_inited; i++)
5707 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
5712 /* Return TRUE if rtx X is a valid immediate FP constant. */
5714 arm_const_double_rtx (rtx x)
5719 if (!fp_consts_inited)
5722 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5723 if (REAL_VALUE_MINUS_ZERO (r))
5726 for (i = 0; i < fp_consts_inited; i++)
5727 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5733 /* Return TRUE if rtx X is a valid immediate FPA constant. */
5735 neg_const_double_rtx_ok_for_fpa (rtx x)
5740 if (!fp_consts_inited)
5743 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5744 r = REAL_VALUE_NEGATE (r);
5745 if (REAL_VALUE_MINUS_ZERO (r))
5748 for (i = 0; i < 8; i++)
5749 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5756 /* VFPv3 has a fairly wide range of representable immediates, formed from
5757 "quarter-precision" floating-point values. These can be evaluated using this
5758 formula (with ^ for exponentiation):
5762 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
5763 16 <= n <= 31 and 0 <= r <= 7.
5765 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
5767 - A (most-significant) is the sign bit.
5768 - BCD are the exponent (encoded as r XOR 3).
5769 - EFGH are the mantissa (encoded as n - 16).
5772 /* Return an integer index for a VFPv3 immediate operand X suitable for the
5773 fconst[sd] instruction, or -1 if X isn't suitable. */
5775 vfp3_const_double_index (rtx x)
5777 REAL_VALUE_TYPE r, m;
5779 unsigned HOST_WIDE_INT mantissa, mant_hi;
5780 unsigned HOST_WIDE_INT mask;
5781 HOST_WIDE_INT m1, m2;
5782 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
5784 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
5787 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5789 /* We can't represent these things, so detect them first. */
5790 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
5793 /* Extract sign, exponent and mantissa. */
5794 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
5795 r = REAL_VALUE_ABS (r);
5796 exponent = REAL_EXP (&r);
5797 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
5798 highest (sign) bit, with a fixed binary point at bit point_pos.
5799 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
5800 bits for the mantissa, this may fail (low bits would be lost). */
5801 real_ldexp (&m, &r, point_pos - exponent);
5802 REAL_VALUE_TO_INT (&m1, &m2, m);
5806 /* If there are bits set in the low part of the mantissa, we can't
5807 represent this value. */
5811 /* Now make it so that mantissa contains the most-significant bits, and move
5812 the point_pos to indicate that the least-significant bits have been
5814 point_pos -= HOST_BITS_PER_WIDE_INT;
5817 /* We can permit four significant bits of mantissa only, plus a high bit
5818 which is always 1. */
5819 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
5820 if ((mantissa & mask) != 0)
5823 /* Now we know the mantissa is in range, chop off the unneeded bits. */
5824 mantissa >>= point_pos - 5;
5826 /* The mantissa may be zero. Disallow that case. (It's possible to load the
5827 floating-point immediate zero with Neon using an integer-zero load, but
5828 that case is handled elsewhere.) */
5832 gcc_assert (mantissa >= 16 && mantissa <= 31);
5834 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
5835 normalized significands are in the range [1, 2). (Our mantissa is shifted
5836 left 4 places at this point relative to normalized IEEE754 values). GCC
5837 internally uses [0.5, 1) (see real.c), so the exponent returned from
5838 REAL_EXP must be altered. */
5839 exponent = 5 - exponent;
5841 if (exponent < 0 || exponent > 7)
5844 /* Sign, mantissa and exponent are now in the correct form to plug into the
5845 formula described in the comment above. */
5846 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
5849 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
5851 vfp3_const_double_rtx (rtx x)
5856 return vfp3_const_double_index (x) != -1;
5859 /* Recognize immediates which can be used in various Neon instructions. Legal
5860 immediates are described by the following table (for VMVN variants, the
5861 bitwise inverse of the constant shown is recognized. In either case, VMOV
5862 is output and the correct instruction to use for a given constant is chosen
5863 by the assembler). The constant shown is replicated across all elements of
5864 the destination vector.
5866 insn elems variant constant (binary)
5867 ---- ----- ------- -----------------
5868 vmov i32 0 00000000 00000000 00000000 abcdefgh
5869 vmov i32 1 00000000 00000000 abcdefgh 00000000
5870 vmov i32 2 00000000 abcdefgh 00000000 00000000
5871 vmov i32 3 abcdefgh 00000000 00000000 00000000
5872 vmov i16 4 00000000 abcdefgh
5873 vmov i16 5 abcdefgh 00000000
5874 vmvn i32 6 00000000 00000000 00000000 abcdefgh
5875 vmvn i32 7 00000000 00000000 abcdefgh 00000000
5876 vmvn i32 8 00000000 abcdefgh 00000000 00000000
5877 vmvn i32 9 abcdefgh 00000000 00000000 00000000
5878 vmvn i16 10 00000000 abcdefgh
5879 vmvn i16 11 abcdefgh 00000000
5880 vmov i32 12 00000000 00000000 abcdefgh 11111111
5881 vmvn i32 13 00000000 00000000 abcdefgh 11111111
5882 vmov i32 14 00000000 abcdefgh 11111111 11111111
5883 vmvn i32 15 00000000 abcdefgh 11111111 11111111
5885 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
5886 eeeeeeee ffffffff gggggggg hhhhhhhh
5887 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
5889 For case 18, B = !b. Representable values are exactly those accepted by
5890 vfp3_const_double_index, but are output as floating-point numbers rather
5893 Variants 0-5 (inclusive) may also be used as immediates for the second
5894 operand of VORR/VBIC instructions.
5896 The INVERSE argument causes the bitwise inverse of the given operand to be
5897 recognized instead (used for recognizing legal immediates for the VAND/VORN
5898 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
5899 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
5900 output, rather than the real insns vbic/vorr).
5902 INVERSE makes no difference to the recognition of float vectors.
5904 The return value is the variant of immediate as shown in the above table, or
5905 -1 if the given value doesn't match any of the listed patterns.
5908 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
5909 rtx *modconst, int *elementwidth)
5911 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
5913 for (i = 0; i < idx; i += (STRIDE)) \
5918 immtype = (CLASS); \
5919 elsize = (ELSIZE); \
5923 unsigned int i, elsize, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
5924 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
5925 unsigned char bytes[16];
5926 int immtype = -1, matches;
5927 unsigned int invmask = inverse ? 0xff : 0;
5929 /* Vectors of float constants. */
5930 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5932 rtx el0 = CONST_VECTOR_ELT (op, 0);
5935 if (!vfp3_const_double_rtx (el0))
5938 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
5940 for (i = 1; i < n_elts; i++)
5942 rtx elt = CONST_VECTOR_ELT (op, i);
5945 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
5947 if (!REAL_VALUES_EQUAL (r0, re))
5952 *modconst = CONST_VECTOR_ELT (op, 0);
5960 /* Splat vector constant out into a byte vector. */
5961 for (i = 0; i < n_elts; i++)
5963 rtx el = CONST_VECTOR_ELT (op, i);
5964 unsigned HOST_WIDE_INT elpart;
5965 unsigned int part, parts;
5967 if (GET_CODE (el) == CONST_INT)
5969 elpart = INTVAL (el);
5972 else if (GET_CODE (el) == CONST_DOUBLE)
5974 elpart = CONST_DOUBLE_LOW (el);
5980 for (part = 0; part < parts; part++)
5983 for (byte = 0; byte < innersize; byte++)
5985 bytes[idx++] = (elpart & 0xff) ^ invmask;
5986 elpart >>= BITS_PER_UNIT;
5988 if (GET_CODE (el) == CONST_DOUBLE)
5989 elpart = CONST_DOUBLE_HIGH (el);
5994 gcc_assert (idx == GET_MODE_SIZE (mode));
5998 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
5999 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6001 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6002 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6004 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6005 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6007 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6008 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
6010 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
6012 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
6014 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6015 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6017 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6018 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6020 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6021 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6023 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6024 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
6026 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
6028 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
6030 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6031 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6033 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6034 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6036 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6037 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6039 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6040 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6042 CHECK (1, 8, 16, bytes[i] == bytes[0]);
6044 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6045 && bytes[i] == bytes[(i + 8) % idx]);
6053 *elementwidth = elsize;
6057 unsigned HOST_WIDE_INT imm = 0;
6059 /* Un-invert bytes of recognized vector, if necessary. */
6061 for (i = 0; i < idx; i++)
6062 bytes[i] ^= invmask;
6066 /* FIXME: Broken on 32-bit H_W_I hosts. */
6067 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6069 for (i = 0; i < 8; i++)
6070 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6071 << (i * BITS_PER_UNIT);
6073 *modconst = GEN_INT (imm);
6077 unsigned HOST_WIDE_INT imm = 0;
6079 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6080 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6082 *modconst = GEN_INT (imm);
6090 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
6091 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
6092 float elements), and a modified constant (whatever should be output for a
6093 VMOV) in *MODCONST. */
6096 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
6097 rtx *modconst, int *elementwidth)
6101 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
6107 *modconst = tmpconst;
6110 *elementwidth = tmpwidth;
6115 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
6116 the immediate is valid, write a constant suitable for using as an operand
6117 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
6118 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
6121 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
6122 rtx *modconst, int *elementwidth)
6126 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
6128 if (retval < 0 || retval > 5)
6132 *modconst = tmpconst;
6135 *elementwidth = tmpwidth;
6140 /* Return a string suitable for output of Neon immediate logic operation
6144 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
6145 int inverse, int quad)
6147 int width, is_valid;
6148 static char templ[40];
6150 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
6152 gcc_assert (is_valid != 0);
6155 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
6157 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
6162 /* Output a sequence of pairwise operations to implement a reduction.
6163 NOTE: We do "too much work" here, because pairwise operations work on two
6164 registers-worth of operands in one go. Unfortunately we can't exploit those
6165 extra calculations to do the full operation in fewer steps, I don't think.
6166 Although all vector elements of the result but the first are ignored, we
6167 actually calculate the same result in each of the elements. An alternative
6168 such as initially loading a vector with zero to use as each of the second
6169 operands would use up an additional register and take an extra instruction,
6170 for no particular gain. */
6173 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
6174 rtx (*reduc) (rtx, rtx, rtx))
6176 enum machine_mode inner = GET_MODE_INNER (mode);
6177 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
6180 for (i = parts / 2; i >= 1; i /= 2)
6182 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
6183 emit_insn (reduc (dest, tmpsum, tmpsum));
6188 /* Initialize a vector with non-constant elements. FIXME: We can do better
6189 than the current implementation (building a vector on the stack and then
6190 loading it) in many cases. See rs6000.c. */
6193 neon_expand_vector_init (rtx target, rtx vals)
6195 enum machine_mode mode = GET_MODE (target);
6196 enum machine_mode inner = GET_MODE_INNER (mode);
6197 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
6200 gcc_assert (VECTOR_MODE_P (mode));
6202 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
6203 for (i = 0; i < n_elts; i++)
6204 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
6205 XVECEXP (vals, 0, i));
6207 emit_move_insn (target, mem);
6210 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
6211 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
6212 reported source locations are bogus. */
6215 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
6220 gcc_assert (GET_CODE (operand) == CONST_INT);
6222 lane = INTVAL (operand);
6224 if (lane < low || lane >= high)
6228 /* Bounds-check lanes. */
6231 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6233 bounds_check (operand, low, high, "lane out of range");
6236 /* Bounds-check constants. */
6239 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6241 bounds_check (operand, low, high, "constant out of range");
6245 neon_element_bits (enum machine_mode mode)
6248 return GET_MODE_BITSIZE (mode);
6250 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
6254 /* Predicates for `match_operand' and `match_operator'. */
6256 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
6258 cirrus_memory_offset (rtx op)
6260 /* Reject eliminable registers. */
6261 if (! (reload_in_progress || reload_completed)
6262 && ( reg_mentioned_p (frame_pointer_rtx, op)
6263 || reg_mentioned_p (arg_pointer_rtx, op)
6264 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6265 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6266 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6267 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6270 if (GET_CODE (op) == MEM)
6276 /* Match: (mem (reg)). */
6277 if (GET_CODE (ind) == REG)
6283 if (GET_CODE (ind) == PLUS
6284 && GET_CODE (XEXP (ind, 0)) == REG
6285 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6286 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
6293 /* Return TRUE if OP is a valid coprocessor memory address pattern.
6294 WB is true if full writeback address modes are allowed and is false
6295 if limited writeback address modes (POST_INC and PRE_DEC) are
6299 arm_coproc_mem_operand (rtx op, bool wb)
6303 /* Reject eliminable registers. */
6304 if (! (reload_in_progress || reload_completed)
6305 && ( reg_mentioned_p (frame_pointer_rtx, op)
6306 || reg_mentioned_p (arg_pointer_rtx, op)
6307 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6308 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6309 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6310 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6313 /* Constants are converted into offsets from labels. */
6314 if (GET_CODE (op) != MEM)
6319 if (reload_completed
6320 && (GET_CODE (ind) == LABEL_REF
6321 || (GET_CODE (ind) == CONST
6322 && GET_CODE (XEXP (ind, 0)) == PLUS
6323 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6324 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6327 /* Match: (mem (reg)). */
6328 if (GET_CODE (ind) == REG)
6329 return arm_address_register_rtx_p (ind, 0);
6331 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
6332 acceptable in any case (subject to verification by
6333 arm_address_register_rtx_p). We need WB to be true to accept
6334 PRE_INC and POST_DEC. */
6335 if (GET_CODE (ind) == POST_INC
6336 || GET_CODE (ind) == PRE_DEC
6338 && (GET_CODE (ind) == PRE_INC
6339 || GET_CODE (ind) == POST_DEC)))
6340 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6343 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
6344 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6345 && GET_CODE (XEXP (ind, 1)) == PLUS
6346 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6347 ind = XEXP (ind, 1);
6352 if (GET_CODE (ind) == PLUS
6353 && GET_CODE (XEXP (ind, 0)) == REG
6354 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6355 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6356 && INTVAL (XEXP (ind, 1)) > -1024
6357 && INTVAL (XEXP (ind, 1)) < 1024
6358 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6364 /* Return TRUE if OP is a memory operand which we can load or store a vector
6365 to/from. If CORE is true, we're moving from ARM registers not Neon
6368 neon_vector_mem_operand (rtx op, bool core)
6372 /* Reject eliminable registers. */
6373 if (! (reload_in_progress || reload_completed)
6374 && ( reg_mentioned_p (frame_pointer_rtx, op)
6375 || reg_mentioned_p (arg_pointer_rtx, op)
6376 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6377 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6378 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6379 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6382 /* Constants are converted into offsets from labels. */
6383 if (GET_CODE (op) != MEM)
6388 if (reload_completed
6389 && (GET_CODE (ind) == LABEL_REF
6390 || (GET_CODE (ind) == CONST
6391 && GET_CODE (XEXP (ind, 0)) == PLUS
6392 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6393 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6396 /* Match: (mem (reg)). */
6397 if (GET_CODE (ind) == REG)
6398 return arm_address_register_rtx_p (ind, 0);
6400 /* Allow post-increment with Neon registers. */
6401 if (!core && GET_CODE (ind) == POST_INC)
6402 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6405 /* FIXME: We can support this too if we use VLD1/VST1. */
6407 && GET_CODE (ind) == POST_MODIFY
6408 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6409 && GET_CODE (XEXP (ind, 1)) == PLUS
6410 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6411 ind = XEXP (ind, 1);
6418 && GET_CODE (ind) == PLUS
6419 && GET_CODE (XEXP (ind, 0)) == REG
6420 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6421 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6422 && INTVAL (XEXP (ind, 1)) > -1024
6423 && INTVAL (XEXP (ind, 1)) < 1016
6424 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6430 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
6433 neon_struct_mem_operand (rtx op)
6437 /* Reject eliminable registers. */
6438 if (! (reload_in_progress || reload_completed)
6439 && ( reg_mentioned_p (frame_pointer_rtx, op)
6440 || reg_mentioned_p (arg_pointer_rtx, op)
6441 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6442 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6443 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6444 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6447 /* Constants are converted into offsets from labels. */
6448 if (GET_CODE (op) != MEM)
6453 if (reload_completed
6454 && (GET_CODE (ind) == LABEL_REF
6455 || (GET_CODE (ind) == CONST
6456 && GET_CODE (XEXP (ind, 0)) == PLUS
6457 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6458 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6461 /* Match: (mem (reg)). */
6462 if (GET_CODE (ind) == REG)
6463 return arm_address_register_rtx_p (ind, 0);
6468 /* Return true if X is a register that will be eliminated later on. */
6470 arm_eliminable_register (rtx x)
6472 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
6473 || REGNO (x) == ARG_POINTER_REGNUM
6474 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
6475 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
6478 /* Return GENERAL_REGS if a scratch register required to reload x to/from
6479 coprocessor registers. Otherwise return NO_REGS. */
6482 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
6485 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6486 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6487 && neon_vector_mem_operand (x, FALSE))
6490 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
6493 return GENERAL_REGS;
6496 /* Values which must be returned in the most-significant end of the return
6500 arm_return_in_msb (const_tree valtype)
6502 return (TARGET_AAPCS_BASED
6504 && (AGGREGATE_TYPE_P (valtype)
6505 || TREE_CODE (valtype) == COMPLEX_TYPE));
6508 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
6509 Use by the Cirrus Maverick code which has to workaround
6510 a hardware bug triggered by such instructions. */
6512 arm_memory_load_p (rtx insn)
6514 rtx body, lhs, rhs;;
6516 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
6519 body = PATTERN (insn);
6521 if (GET_CODE (body) != SET)
6524 lhs = XEXP (body, 0);
6525 rhs = XEXP (body, 1);
6527 lhs = REG_OR_SUBREG_RTX (lhs);
6529 /* If the destination is not a general purpose
6530 register we do not have to worry. */
6531 if (GET_CODE (lhs) != REG
6532 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
6535 /* As well as loads from memory we also have to react
6536 to loads of invalid constants which will be turned
6537 into loads from the minipool. */
6538 return (GET_CODE (rhs) == MEM
6539 || GET_CODE (rhs) == SYMBOL_REF
6540 || note_invalid_constants (insn, -1, false));
6543 /* Return TRUE if INSN is a Cirrus instruction. */
6545 arm_cirrus_insn_p (rtx insn)
6547 enum attr_cirrus attr;
6549 /* get_attr cannot accept USE or CLOBBER. */
6551 || GET_CODE (insn) != INSN
6552 || GET_CODE (PATTERN (insn)) == USE
6553 || GET_CODE (PATTERN (insn)) == CLOBBER)
6556 attr = get_attr_cirrus (insn);
6558 return attr != CIRRUS_NOT;
6561 /* Cirrus reorg for invalid instruction combinations. */
6563 cirrus_reorg (rtx first)
6565 enum attr_cirrus attr;
6566 rtx body = PATTERN (first);
6570 /* Any branch must be followed by 2 non Cirrus instructions. */
6571 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
6574 t = next_nonnote_insn (first);
6576 if (arm_cirrus_insn_p (t))
6579 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6583 emit_insn_after (gen_nop (), first);
6588 /* (float (blah)) is in parallel with a clobber. */
6589 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
6590 body = XVECEXP (body, 0, 0);
6592 if (GET_CODE (body) == SET)
6594 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
6596 /* cfldrd, cfldr64, cfstrd, cfstr64 must
6597 be followed by a non Cirrus insn. */
6598 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
6600 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
6601 emit_insn_after (gen_nop (), first);
6605 else if (arm_memory_load_p (first))
6607 unsigned int arm_regno;
6609 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
6610 ldr/cfmv64hr combination where the Rd field is the same
6611 in both instructions must be split with a non Cirrus
6618 /* Get Arm register number for ldr insn. */
6619 if (GET_CODE (lhs) == REG)
6620 arm_regno = REGNO (lhs);
6623 gcc_assert (GET_CODE (rhs) == REG);
6624 arm_regno = REGNO (rhs);
6628 first = next_nonnote_insn (first);
6630 if (! arm_cirrus_insn_p (first))
6633 body = PATTERN (first);
6635 /* (float (blah)) is in parallel with a clobber. */
6636 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
6637 body = XVECEXP (body, 0, 0);
6639 if (GET_CODE (body) == FLOAT)
6640 body = XEXP (body, 0);
6642 if (get_attr_cirrus (first) == CIRRUS_MOVE
6643 && GET_CODE (XEXP (body, 1)) == REG
6644 && arm_regno == REGNO (XEXP (body, 1)))
6645 emit_insn_after (gen_nop (), first);
6651 /* get_attr cannot accept USE or CLOBBER. */
6653 || GET_CODE (first) != INSN
6654 || GET_CODE (PATTERN (first)) == USE
6655 || GET_CODE (PATTERN (first)) == CLOBBER)
6658 attr = get_attr_cirrus (first);
6660 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
6661 must be followed by a non-coprocessor instruction. */
6662 if (attr == CIRRUS_COMPARE)
6666 t = next_nonnote_insn (first);
6668 if (arm_cirrus_insn_p (t))
6671 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6675 emit_insn_after (gen_nop (), first);
6681 /* Return TRUE if X references a SYMBOL_REF. */
6683 symbol_mentioned_p (rtx x)
6688 if (GET_CODE (x) == SYMBOL_REF)
6691 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
6692 are constant offsets, not symbols. */
6693 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6696 fmt = GET_RTX_FORMAT (GET_CODE (x));
6698 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6704 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6705 if (symbol_mentioned_p (XVECEXP (x, i, j)))
6708 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
6715 /* Return TRUE if X references a LABEL_REF. */
6717 label_mentioned_p (rtx x)
6722 if (GET_CODE (x) == LABEL_REF)
6725 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
6726 instruction, but they are constant offsets, not symbols. */
6727 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6730 fmt = GET_RTX_FORMAT (GET_CODE (x));
6731 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6737 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6738 if (label_mentioned_p (XVECEXP (x, i, j)))
6741 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
6749 tls_mentioned_p (rtx x)
6751 switch (GET_CODE (x))
6754 return tls_mentioned_p (XEXP (x, 0));
6757 if (XINT (x, 1) == UNSPEC_TLS)
6765 /* Must not copy a SET whose source operand is PC-relative. */
6768 arm_cannot_copy_insn_p (rtx insn)
6770 rtx pat = PATTERN (insn);
6772 if (GET_CODE (pat) == SET)
6774 rtx rhs = SET_SRC (pat);
6776 if (GET_CODE (rhs) == UNSPEC
6777 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
6780 if (GET_CODE (rhs) == MEM
6781 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
6782 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
6792 enum rtx_code code = GET_CODE (x);
6809 /* Return 1 if memory locations are adjacent. */
6811 adjacent_mem_locations (rtx a, rtx b)
6813 /* We don't guarantee to preserve the order of these memory refs. */
6814 if (volatile_refs_p (a) || volatile_refs_p (b))
6817 if ((GET_CODE (XEXP (a, 0)) == REG
6818 || (GET_CODE (XEXP (a, 0)) == PLUS
6819 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
6820 && (GET_CODE (XEXP (b, 0)) == REG
6821 || (GET_CODE (XEXP (b, 0)) == PLUS
6822 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
6824 HOST_WIDE_INT val0 = 0, val1 = 0;
6828 if (GET_CODE (XEXP (a, 0)) == PLUS)
6830 reg0 = XEXP (XEXP (a, 0), 0);
6831 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
6836 if (GET_CODE (XEXP (b, 0)) == PLUS)
6838 reg1 = XEXP (XEXP (b, 0), 0);
6839 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
6844 /* Don't accept any offset that will require multiple
6845 instructions to handle, since this would cause the
6846 arith_adjacentmem pattern to output an overlong sequence. */
6847 if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
6850 /* Don't allow an eliminable register: register elimination can make
6851 the offset too large. */
6852 if (arm_eliminable_register (reg0))
6855 val_diff = val1 - val0;
6859 /* If the target has load delay slots, then there's no benefit
6860 to using an ldm instruction unless the offset is zero and
6861 we are optimizing for size. */
6862 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
6863 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
6864 && (val_diff == 4 || val_diff == -4));
6867 return ((REGNO (reg0) == REGNO (reg1))
6868 && (val_diff == 4 || val_diff == -4));
6875 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
6876 HOST_WIDE_INT *load_offset)
6878 int unsorted_regs[4];
6879 HOST_WIDE_INT unsorted_offsets[4];
6884 /* Can only handle 2, 3, or 4 insns at present,
6885 though could be easily extended if required. */
6886 gcc_assert (nops >= 2 && nops <= 4);
6888 /* Loop over the operands and check that the memory references are
6889 suitable (i.e. immediate offsets from the same base register). At
6890 the same time, extract the target register, and the memory
6892 for (i = 0; i < nops; i++)
6897 /* Convert a subreg of a mem into the mem itself. */
6898 if (GET_CODE (operands[nops + i]) == SUBREG)
6899 operands[nops + i] = alter_subreg (operands + (nops + i));
6901 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
6903 /* Don't reorder volatile memory references; it doesn't seem worth
6904 looking for the case where the order is ok anyway. */
6905 if (MEM_VOLATILE_P (operands[nops + i]))
6908 offset = const0_rtx;
6910 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
6911 || (GET_CODE (reg) == SUBREG
6912 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6913 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
6914 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
6916 || (GET_CODE (reg) == SUBREG
6917 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6918 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
6923 base_reg = REGNO (reg);
6924 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
6925 ? REGNO (operands[i])
6926 : REGNO (SUBREG_REG (operands[i])));
6931 if (base_reg != (int) REGNO (reg))
6932 /* Not addressed from the same base register. */
6935 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
6936 ? REGNO (operands[i])
6937 : REGNO (SUBREG_REG (operands[i])));
6938 if (unsorted_regs[i] < unsorted_regs[order[0]])
6942 /* If it isn't an integer register, or if it overwrites the
6943 base register but isn't the last insn in the list, then
6944 we can't do this. */
6945 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
6946 || (i != nops - 1 && unsorted_regs[i] == base_reg))
6949 unsorted_offsets[i] = INTVAL (offset);
6952 /* Not a suitable memory address. */
6956 /* All the useful information has now been extracted from the
6957 operands into unsorted_regs and unsorted_offsets; additionally,
6958 order[0] has been set to the lowest numbered register in the
6959 list. Sort the registers into order, and check that the memory
6960 offsets are ascending and adjacent. */
6962 for (i = 1; i < nops; i++)
6966 order[i] = order[i - 1];
6967 for (j = 0; j < nops; j++)
6968 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
6969 && (order[i] == order[i - 1]
6970 || unsorted_regs[j] < unsorted_regs[order[i]]))
6973 /* Have we found a suitable register? if not, one must be used more
6975 if (order[i] == order[i - 1])
6978 /* Is the memory address adjacent and ascending? */
6979 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
6987 for (i = 0; i < nops; i++)
6988 regs[i] = unsorted_regs[order[i]];
6990 *load_offset = unsorted_offsets[order[0]];
6993 if (unsorted_offsets[order[0]] == 0)
6994 return 1; /* ldmia */
6996 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
6997 return 2; /* ldmib */
6999 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
7000 return 3; /* ldmda */
7002 if (unsorted_offsets[order[nops - 1]] == -4)
7003 return 4; /* ldmdb */
7005 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
7006 if the offset isn't small enough. The reason 2 ldrs are faster
7007 is because these ARMs are able to do more than one cache access
7008 in a single cycle. The ARM9 and StrongARM have Harvard caches,
7009 whilst the ARM8 has a double bandwidth cache. This means that
7010 these cores can do both an instruction fetch and a data fetch in
7011 a single cycle, so the trick of calculating the address into a
7012 scratch register (one of the result regs) and then doing a load
7013 multiple actually becomes slower (and no smaller in code size).
7014 That is the transformation
7016 ldr rd1, [rbase + offset]
7017 ldr rd2, [rbase + offset + 4]
7021 add rd1, rbase, offset
7022 ldmia rd1, {rd1, rd2}
7024 produces worse code -- '3 cycles + any stalls on rd2' instead of
7025 '2 cycles + any stalls on rd2'. On ARMs with only one cache
7026 access per cycle, the first sequence could never complete in less
7027 than 6 cycles, whereas the ldm sequence would only take 5 and
7028 would make better use of sequential accesses if not hitting the
7031 We cheat here and test 'arm_ld_sched' which we currently know to
7032 only be true for the ARM8, ARM9 and StrongARM. If this ever
7033 changes, then the test below needs to be reworked. */
7034 if (nops == 2 && arm_ld_sched)
7037 /* Can't do it without setting up the offset, only do this if it takes
7038 no more than one insn. */
7039 return (const_ok_for_arm (unsorted_offsets[order[0]])
7040 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
7044 emit_ldm_seq (rtx *operands, int nops)
7048 HOST_WIDE_INT offset;
7052 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7055 strcpy (buf, "ldm%(ia%)\t");
7059 strcpy (buf, "ldm%(ib%)\t");
7063 strcpy (buf, "ldm%(da%)\t");
7067 strcpy (buf, "ldm%(db%)\t");
7072 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7073 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7076 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7077 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7079 output_asm_insn (buf, operands);
7081 strcpy (buf, "ldm%(ia%)\t");
7088 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7089 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7091 for (i = 1; i < nops; i++)
7092 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7093 reg_names[regs[i]]);
7095 strcat (buf, "}\t%@ phole ldm");
7097 output_asm_insn (buf, operands);
7102 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7103 HOST_WIDE_INT * load_offset)
7105 int unsorted_regs[4];
7106 HOST_WIDE_INT unsorted_offsets[4];
7111 /* Can only handle 2, 3, or 4 insns at present, though could be easily
7112 extended if required. */
7113 gcc_assert (nops >= 2 && nops <= 4);
7115 /* Loop over the operands and check that the memory references are
7116 suitable (i.e. immediate offsets from the same base register). At
7117 the same time, extract the target register, and the memory
7119 for (i = 0; i < nops; i++)
7124 /* Convert a subreg of a mem into the mem itself. */
7125 if (GET_CODE (operands[nops + i]) == SUBREG)
7126 operands[nops + i] = alter_subreg (operands + (nops + i));
7128 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7130 /* Don't reorder volatile memory references; it doesn't seem worth
7131 looking for the case where the order is ok anyway. */
7132 if (MEM_VOLATILE_P (operands[nops + i]))
7135 offset = const0_rtx;
7137 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7138 || (GET_CODE (reg) == SUBREG
7139 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7140 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7141 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7143 || (GET_CODE (reg) == SUBREG
7144 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7145 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7150 base_reg = REGNO (reg);
7151 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7152 ? REGNO (operands[i])
7153 : REGNO (SUBREG_REG (operands[i])));
7158 if (base_reg != (int) REGNO (reg))
7159 /* Not addressed from the same base register. */
7162 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7163 ? REGNO (operands[i])
7164 : REGNO (SUBREG_REG (operands[i])));
7165 if (unsorted_regs[i] < unsorted_regs[order[0]])
7169 /* If it isn't an integer register, then we can't do this. */
7170 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
7173 unsorted_offsets[i] = INTVAL (offset);
7176 /* Not a suitable memory address. */
7180 /* All the useful information has now been extracted from the
7181 operands into unsorted_regs and unsorted_offsets; additionally,
7182 order[0] has been set to the lowest numbered register in the
7183 list. Sort the registers into order, and check that the memory
7184 offsets are ascending and adjacent. */
7186 for (i = 1; i < nops; i++)
7190 order[i] = order[i - 1];
7191 for (j = 0; j < nops; j++)
7192 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7193 && (order[i] == order[i - 1]
7194 || unsorted_regs[j] < unsorted_regs[order[i]]))
7197 /* Have we found a suitable register? if not, one must be used more
7199 if (order[i] == order[i - 1])
7202 /* Is the memory address adjacent and ascending? */
7203 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7211 for (i = 0; i < nops; i++)
7212 regs[i] = unsorted_regs[order[i]];
7214 *load_offset = unsorted_offsets[order[0]];
7217 if (unsorted_offsets[order[0]] == 0)
7218 return 1; /* stmia */
7220 if (unsorted_offsets[order[0]] == 4)
7221 return 2; /* stmib */
7223 if (unsorted_offsets[order[nops - 1]] == 0)
7224 return 3; /* stmda */
7226 if (unsorted_offsets[order[nops - 1]] == -4)
7227 return 4; /* stmdb */
7233 emit_stm_seq (rtx *operands, int nops)
7237 HOST_WIDE_INT offset;
7241 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7244 strcpy (buf, "stm%(ia%)\t");
7248 strcpy (buf, "stm%(ib%)\t");
7252 strcpy (buf, "stm%(da%)\t");
7256 strcpy (buf, "stm%(db%)\t");
7263 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7264 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7266 for (i = 1; i < nops; i++)
7267 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7268 reg_names[regs[i]]);
7270 strcat (buf, "}\t%@ phole stm");
7272 output_asm_insn (buf, operands);
7276 /* Routines for use in generating RTL. */
7279 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
7280 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7282 HOST_WIDE_INT offset = *offsetp;
7285 int sign = up ? 1 : -1;
7288 /* XScale has load-store double instructions, but they have stricter
7289 alignment requirements than load-store multiple, so we cannot
7292 For XScale ldm requires 2 + NREGS cycles to complete and blocks
7293 the pipeline until completion.
7301 An ldr instruction takes 1-3 cycles, but does not block the
7310 Best case ldr will always win. However, the more ldr instructions
7311 we issue, the less likely we are to be able to schedule them well.
7312 Using ldr instructions also increases code size.
7314 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
7315 for counts of 3 or 4 regs. */
7316 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7322 for (i = 0; i < count; i++)
7324 addr = plus_constant (from, i * 4 * sign);
7325 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7326 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
7332 emit_move_insn (from, plus_constant (from, count * 4 * sign));
7342 result = gen_rtx_PARALLEL (VOIDmode,
7343 rtvec_alloc (count + (write_back ? 1 : 0)));
7346 XVECEXP (result, 0, 0)
7347 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
7352 for (j = 0; i < count; i++, j++)
7354 addr = plus_constant (from, j * 4 * sign);
7355 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7356 XVECEXP (result, 0, i)
7357 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
7368 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
7369 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7371 HOST_WIDE_INT offset = *offsetp;
7374 int sign = up ? 1 : -1;
7377 /* See arm_gen_load_multiple for discussion of
7378 the pros/cons of ldm/stm usage for XScale. */
7379 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7385 for (i = 0; i < count; i++)
7387 addr = plus_constant (to, i * 4 * sign);
7388 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7389 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
7395 emit_move_insn (to, plus_constant (to, count * 4 * sign));
7405 result = gen_rtx_PARALLEL (VOIDmode,
7406 rtvec_alloc (count + (write_back ? 1 : 0)));
7409 XVECEXP (result, 0, 0)
7410 = gen_rtx_SET (VOIDmode, to,
7411 plus_constant (to, count * 4 * sign));
7416 for (j = 0; i < count; i++, j++)
7418 addr = plus_constant (to, j * 4 * sign);
7419 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7420 XVECEXP (result, 0, i)
7421 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
7432 arm_gen_movmemqi (rtx *operands)
7434 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
7435 HOST_WIDE_INT srcoffset, dstoffset;
7437 rtx src, dst, srcbase, dstbase;
7438 rtx part_bytes_reg = NULL;
7441 if (GET_CODE (operands[2]) != CONST_INT
7442 || GET_CODE (operands[3]) != CONST_INT
7443 || INTVAL (operands[2]) > 64
7444 || INTVAL (operands[3]) & 3)
7447 dstbase = operands[0];
7448 srcbase = operands[1];
7450 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
7451 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
7453 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
7454 out_words_to_go = INTVAL (operands[2]) / 4;
7455 last_bytes = INTVAL (operands[2]) & 3;
7456 dstoffset = srcoffset = 0;
7458 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
7459 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
7461 for (i = 0; in_words_to_go >= 2; i+=4)
7463 if (in_words_to_go > 4)
7464 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
7465 srcbase, &srcoffset));
7467 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
7468 FALSE, srcbase, &srcoffset));
7470 if (out_words_to_go)
7472 if (out_words_to_go > 4)
7473 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
7474 dstbase, &dstoffset));
7475 else if (out_words_to_go != 1)
7476 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
7480 dstbase, &dstoffset));
7483 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7484 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
7485 if (last_bytes != 0)
7487 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
7493 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
7494 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
7497 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
7498 if (out_words_to_go)
7502 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7503 sreg = copy_to_reg (mem);
7505 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7506 emit_move_insn (mem, sreg);
7509 gcc_assert (!in_words_to_go); /* Sanity check */
7514 gcc_assert (in_words_to_go > 0);
7516 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7517 part_bytes_reg = copy_to_mode_reg (SImode, mem);
7520 gcc_assert (!last_bytes || part_bytes_reg);
7522 if (BYTES_BIG_ENDIAN && last_bytes)
7524 rtx tmp = gen_reg_rtx (SImode);
7526 /* The bytes we want are in the top end of the word. */
7527 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
7528 GEN_INT (8 * (4 - last_bytes))));
7529 part_bytes_reg = tmp;
7533 mem = adjust_automodify_address (dstbase, QImode,
7534 plus_constant (dst, last_bytes - 1),
7535 dstoffset + last_bytes - 1);
7536 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7540 tmp = gen_reg_rtx (SImode);
7541 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
7542 part_bytes_reg = tmp;
7551 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
7552 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
7556 rtx tmp = gen_reg_rtx (SImode);
7557 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
7558 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
7559 part_bytes_reg = tmp;
7566 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
7567 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7574 /* Select a dominance comparison mode if possible for a test of the general
7575 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
7576 COND_OR == DOM_CC_X_AND_Y => (X && Y)
7577 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
7578 COND_OR == DOM_CC_X_OR_Y => (X || Y)
7579 In all cases OP will be either EQ or NE, but we don't need to know which
7580 here. If we are unable to support a dominance comparison we return
7581 CC mode. This will then fail to match for the RTL expressions that
7582 generate this call. */
7584 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
7586 enum rtx_code cond1, cond2;
7589 /* Currently we will probably get the wrong result if the individual
7590 comparisons are not simple. This also ensures that it is safe to
7591 reverse a comparison if necessary. */
7592 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
7594 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
7598 /* The if_then_else variant of this tests the second condition if the
7599 first passes, but is true if the first fails. Reverse the first
7600 condition to get a true "inclusive-or" expression. */
7601 if (cond_or == DOM_CC_NX_OR_Y)
7602 cond1 = reverse_condition (cond1);
7604 /* If the comparisons are not equal, and one doesn't dominate the other,
7605 then we can't do this. */
7607 && !comparison_dominates_p (cond1, cond2)
7608 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
7613 enum rtx_code temp = cond1;
7621 if (cond_or == DOM_CC_X_AND_Y)
7626 case EQ: return CC_DEQmode;
7627 case LE: return CC_DLEmode;
7628 case LEU: return CC_DLEUmode;
7629 case GE: return CC_DGEmode;
7630 case GEU: return CC_DGEUmode;
7631 default: gcc_unreachable ();
7635 if (cond_or == DOM_CC_X_AND_Y)
7651 if (cond_or == DOM_CC_X_AND_Y)
7667 if (cond_or == DOM_CC_X_AND_Y)
7683 if (cond_or == DOM_CC_X_AND_Y)
7698 /* The remaining cases only occur when both comparisons are the
7701 gcc_assert (cond1 == cond2);
7705 gcc_assert (cond1 == cond2);
7709 gcc_assert (cond1 == cond2);
7713 gcc_assert (cond1 == cond2);
7717 gcc_assert (cond1 == cond2);
7726 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
7728 /* All floating point compares return CCFP if it is an equality
7729 comparison, and CCFPE otherwise. */
7730 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
7750 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
7759 /* A compare with a shifted operand. Because of canonicalization, the
7760 comparison will have to be swapped when we emit the assembler. */
7761 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
7762 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7763 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
7764 || GET_CODE (x) == ROTATERT))
7767 /* This operation is performed swapped, but since we only rely on the Z
7768 flag we don't need an additional mode. */
7769 if (GET_MODE (y) == SImode && REG_P (y)
7770 && GET_CODE (x) == NEG
7771 && (op == EQ || op == NE))
7774 /* This is a special case that is used by combine to allow a
7775 comparison of a shifted byte load to be split into a zero-extend
7776 followed by a comparison of the shifted integer (only valid for
7777 equalities and unsigned inequalities). */
7778 if (GET_MODE (x) == SImode
7779 && GET_CODE (x) == ASHIFT
7780 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
7781 && GET_CODE (XEXP (x, 0)) == SUBREG
7782 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
7783 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
7784 && (op == EQ || op == NE
7785 || op == GEU || op == GTU || op == LTU || op == LEU)
7786 && GET_CODE (y) == CONST_INT)
7789 /* A construct for a conditional compare, if the false arm contains
7790 0, then both conditions must be true, otherwise either condition
7791 must be true. Not all conditions are possible, so CCmode is
7792 returned if it can't be done. */
7793 if (GET_CODE (x) == IF_THEN_ELSE
7794 && (XEXP (x, 2) == const0_rtx
7795 || XEXP (x, 2) == const1_rtx)
7796 && COMPARISON_P (XEXP (x, 0))
7797 && COMPARISON_P (XEXP (x, 1)))
7798 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7799 INTVAL (XEXP (x, 2)));
7801 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
7802 if (GET_CODE (x) == AND
7803 && COMPARISON_P (XEXP (x, 0))
7804 && COMPARISON_P (XEXP (x, 1)))
7805 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7808 if (GET_CODE (x) == IOR
7809 && COMPARISON_P (XEXP (x, 0))
7810 && COMPARISON_P (XEXP (x, 1)))
7811 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7814 /* An operation (on Thumb) where we want to test for a single bit.
7815 This is done by shifting that bit up into the top bit of a
7816 scratch register; we can then branch on the sign bit. */
7818 && GET_MODE (x) == SImode
7819 && (op == EQ || op == NE)
7820 && GET_CODE (x) == ZERO_EXTRACT
7821 && XEXP (x, 1) == const1_rtx)
7824 /* An operation that sets the condition codes as a side-effect, the
7825 V flag is not set correctly, so we can only use comparisons where
7826 this doesn't matter. (For LT and GE we can use "mi" and "pl"
7828 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
7829 if (GET_MODE (x) == SImode
7831 && (op == EQ || op == NE || op == LT || op == GE)
7832 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
7833 || GET_CODE (x) == AND || GET_CODE (x) == IOR
7834 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
7835 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
7836 || GET_CODE (x) == LSHIFTRT
7837 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7838 || GET_CODE (x) == ROTATERT
7839 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
7842 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
7845 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
7846 && GET_CODE (x) == PLUS
7847 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
7853 /* X and Y are two things to compare using CODE. Emit the compare insn and
7854 return the rtx for register 0 in the proper mode. FP means this is a
7855 floating point compare: I don't think that it is needed on the arm. */
7857 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
7859 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
7860 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
7862 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
7867 /* Generate a sequence of insns that will generate the correct return
7868 address mask depending on the physical architecture that the program
7871 arm_gen_return_addr_mask (void)
7873 rtx reg = gen_reg_rtx (Pmode);
7875 emit_insn (gen_return_addr_mask (reg));
7880 arm_reload_in_hi (rtx *operands)
7882 rtx ref = operands[1];
7884 HOST_WIDE_INT offset = 0;
7886 if (GET_CODE (ref) == SUBREG)
7888 offset = SUBREG_BYTE (ref);
7889 ref = SUBREG_REG (ref);
7892 if (GET_CODE (ref) == REG)
7894 /* We have a pseudo which has been spilt onto the stack; there
7895 are two cases here: the first where there is a simple
7896 stack-slot replacement and a second where the stack-slot is
7897 out of range, or is used as a subreg. */
7898 if (reg_equiv_mem[REGNO (ref)])
7900 ref = reg_equiv_mem[REGNO (ref)];
7901 base = find_replacement (&XEXP (ref, 0));
7904 /* The slot is out of range, or was dressed up in a SUBREG. */
7905 base = reg_equiv_address[REGNO (ref)];
7908 base = find_replacement (&XEXP (ref, 0));
7910 /* Handle the case where the address is too complex to be offset by 1. */
7911 if (GET_CODE (base) == MINUS
7912 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
7914 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7916 emit_set_insn (base_plus, base);
7919 else if (GET_CODE (base) == PLUS)
7921 /* The addend must be CONST_INT, or we would have dealt with it above. */
7922 HOST_WIDE_INT hi, lo;
7924 offset += INTVAL (XEXP (base, 1));
7925 base = XEXP (base, 0);
7927 /* Rework the address into a legal sequence of insns. */
7928 /* Valid range for lo is -4095 -> 4095 */
7931 : -((-offset) & 0xfff));
7933 /* Corner case, if lo is the max offset then we would be out of range
7934 once we have added the additional 1 below, so bump the msb into the
7935 pre-loading insn(s). */
7939 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
7940 ^ (HOST_WIDE_INT) 0x80000000)
7941 - (HOST_WIDE_INT) 0x80000000);
7943 gcc_assert (hi + lo == offset);
7947 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7949 /* Get the base address; addsi3 knows how to handle constants
7950 that require more than one insn. */
7951 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
7957 /* Operands[2] may overlap operands[0] (though it won't overlap
7958 operands[1]), that's why we asked for a DImode reg -- so we can
7959 use the bit that does not overlap. */
7960 if (REGNO (operands[2]) == REGNO (operands[0]))
7961 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7963 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
7965 emit_insn (gen_zero_extendqisi2 (scratch,
7966 gen_rtx_MEM (QImode,
7967 plus_constant (base,
7969 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
7970 gen_rtx_MEM (QImode,
7971 plus_constant (base,
7973 if (!BYTES_BIG_ENDIAN)
7974 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
7975 gen_rtx_IOR (SImode,
7978 gen_rtx_SUBREG (SImode, operands[0], 0),
7982 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
7983 gen_rtx_IOR (SImode,
7984 gen_rtx_ASHIFT (SImode, scratch,
7986 gen_rtx_SUBREG (SImode, operands[0], 0)));
7989 /* Handle storing a half-word to memory during reload by synthesizing as two
7990 byte stores. Take care not to clobber the input values until after we
7991 have moved them somewhere safe. This code assumes that if the DImode
7992 scratch in operands[2] overlaps either the input value or output address
7993 in some way, then that value must die in this insn (we absolutely need
7994 two scratch registers for some corner cases). */
7996 arm_reload_out_hi (rtx *operands)
7998 rtx ref = operands[0];
7999 rtx outval = operands[1];
8001 HOST_WIDE_INT offset = 0;
8003 if (GET_CODE (ref) == SUBREG)
8005 offset = SUBREG_BYTE (ref);
8006 ref = SUBREG_REG (ref);
8009 if (GET_CODE (ref) == REG)
8011 /* We have a pseudo which has been spilt onto the stack; there
8012 are two cases here: the first where there is a simple
8013 stack-slot replacement and a second where the stack-slot is
8014 out of range, or is used as a subreg. */
8015 if (reg_equiv_mem[REGNO (ref)])
8017 ref = reg_equiv_mem[REGNO (ref)];
8018 base = find_replacement (&XEXP (ref, 0));
8021 /* The slot is out of range, or was dressed up in a SUBREG. */
8022 base = reg_equiv_address[REGNO (ref)];
8025 base = find_replacement (&XEXP (ref, 0));
8027 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8029 /* Handle the case where the address is too complex to be offset by 1. */
8030 if (GET_CODE (base) == MINUS
8031 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8033 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8035 /* Be careful not to destroy OUTVAL. */
8036 if (reg_overlap_mentioned_p (base_plus, outval))
8038 /* Updating base_plus might destroy outval, see if we can
8039 swap the scratch and base_plus. */
8040 if (!reg_overlap_mentioned_p (scratch, outval))
8043 scratch = base_plus;
8048 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8050 /* Be conservative and copy OUTVAL into the scratch now,
8051 this should only be necessary if outval is a subreg
8052 of something larger than a word. */
8053 /* XXX Might this clobber base? I can't see how it can,
8054 since scratch is known to overlap with OUTVAL, and
8055 must be wider than a word. */
8056 emit_insn (gen_movhi (scratch_hi, outval));
8057 outval = scratch_hi;
8061 emit_set_insn (base_plus, base);
8064 else if (GET_CODE (base) == PLUS)
8066 /* The addend must be CONST_INT, or we would have dealt with it above. */
8067 HOST_WIDE_INT hi, lo;
8069 offset += INTVAL (XEXP (base, 1));
8070 base = XEXP (base, 0);
8072 /* Rework the address into a legal sequence of insns. */
8073 /* Valid range for lo is -4095 -> 4095 */
8076 : -((-offset) & 0xfff));
8078 /* Corner case, if lo is the max offset then we would be out of range
8079 once we have added the additional 1 below, so bump the msb into the
8080 pre-loading insn(s). */
8084 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8085 ^ (HOST_WIDE_INT) 0x80000000)
8086 - (HOST_WIDE_INT) 0x80000000);
8088 gcc_assert (hi + lo == offset);
8092 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8094 /* Be careful not to destroy OUTVAL. */
8095 if (reg_overlap_mentioned_p (base_plus, outval))
8097 /* Updating base_plus might destroy outval, see if we
8098 can swap the scratch and base_plus. */
8099 if (!reg_overlap_mentioned_p (scratch, outval))
8102 scratch = base_plus;
8107 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8109 /* Be conservative and copy outval into scratch now,
8110 this should only be necessary if outval is a
8111 subreg of something larger than a word. */
8112 /* XXX Might this clobber base? I can't see how it
8113 can, since scratch is known to overlap with
8115 emit_insn (gen_movhi (scratch_hi, outval));
8116 outval = scratch_hi;
8120 /* Get the base address; addsi3 knows how to handle constants
8121 that require more than one insn. */
8122 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8128 if (BYTES_BIG_ENDIAN)
8130 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8131 plus_constant (base, offset + 1)),
8132 gen_lowpart (QImode, outval)));
8133 emit_insn (gen_lshrsi3 (scratch,
8134 gen_rtx_SUBREG (SImode, outval, 0),
8136 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8137 gen_lowpart (QImode, scratch)));
8141 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8142 gen_lowpart (QImode, outval)));
8143 emit_insn (gen_lshrsi3 (scratch,
8144 gen_rtx_SUBREG (SImode, outval, 0),
8146 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8147 plus_constant (base, offset + 1)),
8148 gen_lowpart (QImode, scratch)));
8152 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
8153 (padded to the size of a word) should be passed in a register. */
8156 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
8158 if (TARGET_AAPCS_BASED)
8159 return must_pass_in_stack_var_size (mode, type);
8161 return must_pass_in_stack_var_size_or_pad (mode, type);
8165 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
8166 Return true if an argument passed on the stack should be padded upwards,
8167 i.e. if the least-significant byte has useful data.
8168 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
8169 aggregate types are placed in the lowest memory address. */
8172 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
8174 if (!TARGET_AAPCS_BASED)
8175 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
8177 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
8184 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
8185 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
8186 byte of the register has useful data, and return the opposite if the
8187 most significant byte does.
8188 For AAPCS, small aggregates and small complex types are always padded
8192 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
8193 tree type, int first ATTRIBUTE_UNUSED)
8195 if (TARGET_AAPCS_BASED
8197 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
8198 && int_size_in_bytes (type) <= 4)
8201 /* Otherwise, use default padding. */
8202 return !BYTES_BIG_ENDIAN;
8206 /* Print a symbolic form of X to the debug file, F. */
8208 arm_print_value (FILE *f, rtx x)
8210 switch (GET_CODE (x))
8213 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
8217 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
8225 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
8227 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
8228 if (i < (CONST_VECTOR_NUNITS (x) - 1))
8236 fprintf (f, "\"%s\"", XSTR (x, 0));
8240 fprintf (f, "`%s'", XSTR (x, 0));
8244 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
8248 arm_print_value (f, XEXP (x, 0));
8252 arm_print_value (f, XEXP (x, 0));
8254 arm_print_value (f, XEXP (x, 1));
8262 fprintf (f, "????");
8267 /* Routines for manipulation of the constant pool. */
8269 /* Arm instructions cannot load a large constant directly into a
8270 register; they have to come from a pc relative load. The constant
8271 must therefore be placed in the addressable range of the pc
8272 relative load. Depending on the precise pc relative load
8273 instruction the range is somewhere between 256 bytes and 4k. This
8274 means that we often have to dump a constant inside a function, and
8275 generate code to branch around it.
8277 It is important to minimize this, since the branches will slow
8278 things down and make the code larger.
8280 Normally we can hide the table after an existing unconditional
8281 branch so that there is no interruption of the flow, but in the
8282 worst case the code looks like this:
8300 We fix this by performing a scan after scheduling, which notices
8301 which instructions need to have their operands fetched from the
8302 constant table and builds the table.
8304 The algorithm starts by building a table of all the constants that
8305 need fixing up and all the natural barriers in the function (places
8306 where a constant table can be dropped without breaking the flow).
8307 For each fixup we note how far the pc-relative replacement will be
8308 able to reach and the offset of the instruction into the function.
8310 Having built the table we then group the fixes together to form
8311 tables that are as large as possible (subject to addressing
8312 constraints) and emit each table of constants after the last
8313 barrier that is within range of all the instructions in the group.
8314 If a group does not contain a barrier, then we forcibly create one
8315 by inserting a jump instruction into the flow. Once the table has
8316 been inserted, the insns are then modified to reference the
8317 relevant entry in the pool.
8319 Possible enhancements to the algorithm (not implemented) are:
8321 1) For some processors and object formats, there may be benefit in
8322 aligning the pools to the start of cache lines; this alignment
8323 would need to be taken into account when calculating addressability
8326 /* These typedefs are located at the start of this file, so that
8327 they can be used in the prototypes there. This comment is to
8328 remind readers of that fact so that the following structures
8329 can be understood more easily.
8331 typedef struct minipool_node Mnode;
8332 typedef struct minipool_fixup Mfix; */
8334 struct minipool_node
8336 /* Doubly linked chain of entries. */
8339 /* The maximum offset into the code that this entry can be placed. While
8340 pushing fixes for forward references, all entries are sorted in order
8341 of increasing max_address. */
8342 HOST_WIDE_INT max_address;
8343 /* Similarly for an entry inserted for a backwards ref. */
8344 HOST_WIDE_INT min_address;
8345 /* The number of fixes referencing this entry. This can become zero
8346 if we "unpush" an entry. In this case we ignore the entry when we
8347 come to emit the code. */
8349 /* The offset from the start of the minipool. */
8350 HOST_WIDE_INT offset;
8351 /* The value in table. */
8353 /* The mode of value. */
8354 enum machine_mode mode;
8355 /* The size of the value. With iWMMXt enabled
8356 sizes > 4 also imply an alignment of 8-bytes. */
8360 struct minipool_fixup
8364 HOST_WIDE_INT address;
8366 enum machine_mode mode;
8370 HOST_WIDE_INT forwards;
8371 HOST_WIDE_INT backwards;
8374 /* Fixes less than a word need padding out to a word boundary. */
8375 #define MINIPOOL_FIX_SIZE(mode) \
8376 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
8378 static Mnode * minipool_vector_head;
8379 static Mnode * minipool_vector_tail;
8380 static rtx minipool_vector_label;
8381 static int minipool_pad;
8383 /* The linked list of all minipool fixes required for this function. */
8384 Mfix * minipool_fix_head;
8385 Mfix * minipool_fix_tail;
8386 /* The fix entry for the current minipool, once it has been placed. */
8387 Mfix * minipool_barrier;
8389 /* Determines if INSN is the start of a jump table. Returns the end
8390 of the TABLE or NULL_RTX. */
8392 is_jump_table (rtx insn)
8396 if (GET_CODE (insn) == JUMP_INSN
8397 && JUMP_LABEL (insn) != NULL
8398 && ((table = next_real_insn (JUMP_LABEL (insn)))
8399 == next_real_insn (insn))
8401 && GET_CODE (table) == JUMP_INSN
8402 && (GET_CODE (PATTERN (table)) == ADDR_VEC
8403 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
8409 #ifndef JUMP_TABLES_IN_TEXT_SECTION
8410 #define JUMP_TABLES_IN_TEXT_SECTION 0
8413 static HOST_WIDE_INT
8414 get_jump_table_size (rtx insn)
8416 /* ADDR_VECs only take room if read-only data does into the text
8418 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
8420 rtx body = PATTERN (insn);
8421 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
8423 HOST_WIDE_INT modesize;
8425 modesize = GET_MODE_SIZE (GET_MODE (body));
8426 size = modesize * XVECLEN (body, elt);
8430 /* Round up size of TBB table to a halfword boundary. */
8431 size = (size + 1) & ~(HOST_WIDE_INT)1;
8434 /* No padding necessary for TBH. */
8437 /* Add two bytes for alignment on Thumb. */
8450 /* Move a minipool fix MP from its current location to before MAX_MP.
8451 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
8452 constraints may need updating. */
8454 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
8455 HOST_WIDE_INT max_address)
8457 /* The code below assumes these are different. */
8458 gcc_assert (mp != max_mp);
8462 if (max_address < mp->max_address)
8463 mp->max_address = max_address;
8467 if (max_address > max_mp->max_address - mp->fix_size)
8468 mp->max_address = max_mp->max_address - mp->fix_size;
8470 mp->max_address = max_address;
8472 /* Unlink MP from its current position. Since max_mp is non-null,
8473 mp->prev must be non-null. */
8474 mp->prev->next = mp->next;
8475 if (mp->next != NULL)
8476 mp->next->prev = mp->prev;
8478 minipool_vector_tail = mp->prev;
8480 /* Re-insert it before MAX_MP. */
8482 mp->prev = max_mp->prev;
8485 if (mp->prev != NULL)
8486 mp->prev->next = mp;
8488 minipool_vector_head = mp;
8491 /* Save the new entry. */
8494 /* Scan over the preceding entries and adjust their addresses as
8496 while (mp->prev != NULL
8497 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8499 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8506 /* Add a constant to the minipool for a forward reference. Returns the
8507 node added or NULL if the constant will not fit in this pool. */
8509 add_minipool_forward_ref (Mfix *fix)
8511 /* If set, max_mp is the first pool_entry that has a lower
8512 constraint than the one we are trying to add. */
8513 Mnode * max_mp = NULL;
8514 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
8517 /* If the minipool starts before the end of FIX->INSN then this FIX
8518 can not be placed into the current pool. Furthermore, adding the
8519 new constant pool entry may cause the pool to start FIX_SIZE bytes
8521 if (minipool_vector_head &&
8522 (fix->address + get_attr_length (fix->insn)
8523 >= minipool_vector_head->max_address - fix->fix_size))
8526 /* Scan the pool to see if a constant with the same value has
8527 already been added. While we are doing this, also note the
8528 location where we must insert the constant if it doesn't already
8530 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8532 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8533 && fix->mode == mp->mode
8534 && (GET_CODE (fix->value) != CODE_LABEL
8535 || (CODE_LABEL_NUMBER (fix->value)
8536 == CODE_LABEL_NUMBER (mp->value)))
8537 && rtx_equal_p (fix->value, mp->value))
8539 /* More than one fix references this entry. */
8541 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
8544 /* Note the insertion point if necessary. */
8546 && mp->max_address > max_address)
8549 /* If we are inserting an 8-bytes aligned quantity and
8550 we have not already found an insertion point, then
8551 make sure that all such 8-byte aligned quantities are
8552 placed at the start of the pool. */
8553 if (ARM_DOUBLEWORD_ALIGN
8555 && fix->fix_size >= 8
8556 && mp->fix_size < 8)
8559 max_address = mp->max_address;
8563 /* The value is not currently in the minipool, so we need to create
8564 a new entry for it. If MAX_MP is NULL, the entry will be put on
8565 the end of the list since the placement is less constrained than
8566 any existing entry. Otherwise, we insert the new fix before
8567 MAX_MP and, if necessary, adjust the constraints on the other
8570 mp->fix_size = fix->fix_size;
8571 mp->mode = fix->mode;
8572 mp->value = fix->value;
8574 /* Not yet required for a backwards ref. */
8575 mp->min_address = -65536;
8579 mp->max_address = max_address;
8581 mp->prev = minipool_vector_tail;
8583 if (mp->prev == NULL)
8585 minipool_vector_head = mp;
8586 minipool_vector_label = gen_label_rtx ();
8589 mp->prev->next = mp;
8591 minipool_vector_tail = mp;
8595 if (max_address > max_mp->max_address - mp->fix_size)
8596 mp->max_address = max_mp->max_address - mp->fix_size;
8598 mp->max_address = max_address;
8601 mp->prev = max_mp->prev;
8603 if (mp->prev != NULL)
8604 mp->prev->next = mp;
8606 minipool_vector_head = mp;
8609 /* Save the new entry. */
8612 /* Scan over the preceding entries and adjust their addresses as
8614 while (mp->prev != NULL
8615 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8617 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8625 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
8626 HOST_WIDE_INT min_address)
8628 HOST_WIDE_INT offset;
8630 /* The code below assumes these are different. */
8631 gcc_assert (mp != min_mp);
8635 if (min_address > mp->min_address)
8636 mp->min_address = min_address;
8640 /* We will adjust this below if it is too loose. */
8641 mp->min_address = min_address;
8643 /* Unlink MP from its current position. Since min_mp is non-null,
8644 mp->next must be non-null. */
8645 mp->next->prev = mp->prev;
8646 if (mp->prev != NULL)
8647 mp->prev->next = mp->next;
8649 minipool_vector_head = mp->next;
8651 /* Reinsert it after MIN_MP. */
8653 mp->next = min_mp->next;
8655 if (mp->next != NULL)
8656 mp->next->prev = mp;
8658 minipool_vector_tail = mp;
8664 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8666 mp->offset = offset;
8667 if (mp->refcount > 0)
8668 offset += mp->fix_size;
8670 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
8671 mp->next->min_address = mp->min_address + mp->fix_size;
8677 /* Add a constant to the minipool for a backward reference. Returns the
8678 node added or NULL if the constant will not fit in this pool.
8680 Note that the code for insertion for a backwards reference can be
8681 somewhat confusing because the calculated offsets for each fix do
8682 not take into account the size of the pool (which is still under
8685 add_minipool_backward_ref (Mfix *fix)
8687 /* If set, min_mp is the last pool_entry that has a lower constraint
8688 than the one we are trying to add. */
8689 Mnode *min_mp = NULL;
8690 /* This can be negative, since it is only a constraint. */
8691 HOST_WIDE_INT min_address = fix->address - fix->backwards;
8694 /* If we can't reach the current pool from this insn, or if we can't
8695 insert this entry at the end of the pool without pushing other
8696 fixes out of range, then we don't try. This ensures that we
8697 can't fail later on. */
8698 if (min_address >= minipool_barrier->address
8699 || (minipool_vector_tail->min_address + fix->fix_size
8700 >= minipool_barrier->address))
8703 /* Scan the pool to see if a constant with the same value has
8704 already been added. While we are doing this, also note the
8705 location where we must insert the constant if it doesn't already
8707 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
8709 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8710 && fix->mode == mp->mode
8711 && (GET_CODE (fix->value) != CODE_LABEL
8712 || (CODE_LABEL_NUMBER (fix->value)
8713 == CODE_LABEL_NUMBER (mp->value)))
8714 && rtx_equal_p (fix->value, mp->value)
8715 /* Check that there is enough slack to move this entry to the
8716 end of the table (this is conservative). */
8718 > (minipool_barrier->address
8719 + minipool_vector_tail->offset
8720 + minipool_vector_tail->fix_size)))
8723 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
8727 mp->min_address += fix->fix_size;
8730 /* Note the insertion point if necessary. */
8731 if (mp->min_address < min_address)
8733 /* For now, we do not allow the insertion of 8-byte alignment
8734 requiring nodes anywhere but at the start of the pool. */
8735 if (ARM_DOUBLEWORD_ALIGN
8736 && fix->fix_size >= 8 && mp->fix_size < 8)
8741 else if (mp->max_address
8742 < minipool_barrier->address + mp->offset + fix->fix_size)
8744 /* Inserting before this entry would push the fix beyond
8745 its maximum address (which can happen if we have
8746 re-located a forwards fix); force the new fix to come
8749 min_address = mp->min_address + fix->fix_size;
8751 /* If we are inserting an 8-bytes aligned quantity and
8752 we have not already found an insertion point, then
8753 make sure that all such 8-byte aligned quantities are
8754 placed at the start of the pool. */
8755 else if (ARM_DOUBLEWORD_ALIGN
8757 && fix->fix_size >= 8
8758 && mp->fix_size < 8)
8761 min_address = mp->min_address + fix->fix_size;
8766 /* We need to create a new entry. */
8768 mp->fix_size = fix->fix_size;
8769 mp->mode = fix->mode;
8770 mp->value = fix->value;
8772 mp->max_address = minipool_barrier->address + 65536;
8774 mp->min_address = min_address;
8779 mp->next = minipool_vector_head;
8781 if (mp->next == NULL)
8783 minipool_vector_tail = mp;
8784 minipool_vector_label = gen_label_rtx ();
8787 mp->next->prev = mp;
8789 minipool_vector_head = mp;
8793 mp->next = min_mp->next;
8797 if (mp->next != NULL)
8798 mp->next->prev = mp;
8800 minipool_vector_tail = mp;
8803 /* Save the new entry. */
8811 /* Scan over the following entries and adjust their offsets. */
8812 while (mp->next != NULL)
8814 if (mp->next->min_address < mp->min_address + mp->fix_size)
8815 mp->next->min_address = mp->min_address + mp->fix_size;
8818 mp->next->offset = mp->offset + mp->fix_size;
8820 mp->next->offset = mp->offset;
8829 assign_minipool_offsets (Mfix *barrier)
8831 HOST_WIDE_INT offset = 0;
8834 minipool_barrier = barrier;
8836 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8838 mp->offset = offset;
8840 if (mp->refcount > 0)
8841 offset += mp->fix_size;
8845 /* Output the literal table */
8847 dump_minipool (rtx scan)
8853 if (ARM_DOUBLEWORD_ALIGN)
8854 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8855 if (mp->refcount > 0 && mp->fix_size >= 8)
8863 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
8864 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
8866 scan = emit_label_after (gen_label_rtx (), scan);
8867 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
8868 scan = emit_label_after (minipool_vector_label, scan);
8870 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
8872 if (mp->refcount > 0)
8877 ";; Offset %u, min %ld, max %ld ",
8878 (unsigned) mp->offset, (unsigned long) mp->min_address,
8879 (unsigned long) mp->max_address);
8880 arm_print_value (dump_file, mp->value);
8881 fputc ('\n', dump_file);
8884 switch (mp->fix_size)
8886 #ifdef HAVE_consttable_1
8888 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
8892 #ifdef HAVE_consttable_2
8894 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
8898 #ifdef HAVE_consttable_4
8900 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
8904 #ifdef HAVE_consttable_8
8906 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
8910 #ifdef HAVE_consttable_16
8912 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
8925 minipool_vector_head = minipool_vector_tail = NULL;
8926 scan = emit_insn_after (gen_consttable_end (), scan);
8927 scan = emit_barrier_after (scan);
8930 /* Return the cost of forcibly inserting a barrier after INSN. */
8932 arm_barrier_cost (rtx insn)
8934 /* Basing the location of the pool on the loop depth is preferable,
8935 but at the moment, the basic block information seems to be
8936 corrupt by this stage of the compilation. */
8938 rtx next = next_nonnote_insn (insn);
8940 if (next != NULL && GET_CODE (next) == CODE_LABEL)
8943 switch (GET_CODE (insn))
8946 /* It will always be better to place the table before the label, rather
8955 return base_cost - 10;
8958 return base_cost + 10;
8962 /* Find the best place in the insn stream in the range
8963 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
8964 Create the barrier by inserting a jump and add a new fix entry for
8967 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
8969 HOST_WIDE_INT count = 0;
8971 rtx from = fix->insn;
8972 /* The instruction after which we will insert the jump. */
8973 rtx selected = NULL;
8975 /* The address at which the jump instruction will be placed. */
8976 HOST_WIDE_INT selected_address;
8978 HOST_WIDE_INT max_count = max_address - fix->address;
8979 rtx label = gen_label_rtx ();
8981 selected_cost = arm_barrier_cost (from);
8982 selected_address = fix->address;
8984 while (from && count < max_count)
8989 /* This code shouldn't have been called if there was a natural barrier
8991 gcc_assert (GET_CODE (from) != BARRIER);
8993 /* Count the length of this insn. */
8994 count += get_attr_length (from);
8996 /* If there is a jump table, add its length. */
8997 tmp = is_jump_table (from);
9000 count += get_jump_table_size (tmp);
9002 /* Jump tables aren't in a basic block, so base the cost on
9003 the dispatch insn. If we select this location, we will
9004 still put the pool after the table. */
9005 new_cost = arm_barrier_cost (from);
9007 if (count < max_count
9008 && (!selected || new_cost <= selected_cost))
9011 selected_cost = new_cost;
9012 selected_address = fix->address + count;
9015 /* Continue after the dispatch table. */
9016 from = NEXT_INSN (tmp);
9020 new_cost = arm_barrier_cost (from);
9022 if (count < max_count
9023 && (!selected || new_cost <= selected_cost))
9026 selected_cost = new_cost;
9027 selected_address = fix->address + count;
9030 from = NEXT_INSN (from);
9033 /* Make sure that we found a place to insert the jump. */
9034 gcc_assert (selected);
9036 /* Create a new JUMP_INSN that branches around a barrier. */
9037 from = emit_jump_insn_after (gen_jump (label), selected);
9038 JUMP_LABEL (from) = label;
9039 barrier = emit_barrier_after (from);
9040 emit_label_after (label, barrier);
9042 /* Create a minipool barrier entry for the new barrier. */
9043 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
9044 new_fix->insn = barrier;
9045 new_fix->address = selected_address;
9046 new_fix->next = fix->next;
9047 fix->next = new_fix;
9052 /* Record that there is a natural barrier in the insn stream at
9055 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
9057 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9060 fix->address = address;
9063 if (minipool_fix_head != NULL)
9064 minipool_fix_tail->next = fix;
9066 minipool_fix_head = fix;
9068 minipool_fix_tail = fix;
9071 /* Record INSN, which will need fixing up to load a value from the
9072 minipool. ADDRESS is the offset of the insn since the start of the
9073 function; LOC is a pointer to the part of the insn which requires
9074 fixing; VALUE is the constant that must be loaded, which is of type
9077 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
9078 enum machine_mode mode, rtx value)
9080 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9083 fix->address = address;
9086 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
9088 fix->forwards = get_attr_pool_range (insn);
9089 fix->backwards = get_attr_neg_pool_range (insn);
9090 fix->minipool = NULL;
9092 /* If an insn doesn't have a range defined for it, then it isn't
9093 expecting to be reworked by this code. Better to stop now than
9094 to generate duff assembly code. */
9095 gcc_assert (fix->forwards || fix->backwards);
9097 /* If an entry requires 8-byte alignment then assume all constant pools
9098 require 4 bytes of padding. Trying to do this later on a per-pool
9099 basis is awkward because existing pool entries have to be modified. */
9100 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
9106 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
9107 GET_MODE_NAME (mode),
9108 INSN_UID (insn), (unsigned long) address,
9109 -1 * (long)fix->backwards, (long)fix->forwards);
9110 arm_print_value (dump_file, fix->value);
9111 fprintf (dump_file, "\n");
9114 /* Add it to the chain of fixes. */
9117 if (minipool_fix_head != NULL)
9118 minipool_fix_tail->next = fix;
9120 minipool_fix_head = fix;
9122 minipool_fix_tail = fix;
9125 /* Return the cost of synthesizing a 64-bit constant VAL inline.
9126 Returns the number of insns needed, or 99 if we don't know how to
9129 arm_const_double_inline_cost (rtx val)
9131 rtx lowpart, highpart;
9132 enum machine_mode mode;
9134 mode = GET_MODE (val);
9136 if (mode == VOIDmode)
9139 gcc_assert (GET_MODE_SIZE (mode) == 8);
9141 lowpart = gen_lowpart (SImode, val);
9142 highpart = gen_highpart_mode (SImode, mode, val);
9144 gcc_assert (GET_CODE (lowpart) == CONST_INT);
9145 gcc_assert (GET_CODE (highpart) == CONST_INT);
9147 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
9148 NULL_RTX, NULL_RTX, 0, 0)
9149 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
9150 NULL_RTX, NULL_RTX, 0, 0));
9153 /* Return true if it is worthwhile to split a 64-bit constant into two
9154 32-bit operations. This is the case if optimizing for size, or
9155 if we have load delay slots, or if one 32-bit part can be done with
9156 a single data operation. */
9158 arm_const_double_by_parts (rtx val)
9160 enum machine_mode mode = GET_MODE (val);
9163 if (optimize_size || arm_ld_sched)
9166 if (mode == VOIDmode)
9169 part = gen_highpart_mode (SImode, mode, val);
9171 gcc_assert (GET_CODE (part) == CONST_INT);
9173 if (const_ok_for_arm (INTVAL (part))
9174 || const_ok_for_arm (~INTVAL (part)))
9177 part = gen_lowpart (SImode, val);
9179 gcc_assert (GET_CODE (part) == CONST_INT);
9181 if (const_ok_for_arm (INTVAL (part))
9182 || const_ok_for_arm (~INTVAL (part)))
9188 /* Scan INSN and note any of its operands that need fixing.
9189 If DO_PUSHES is false we do not actually push any of the fixups
9190 needed. The function returns TRUE if any fixups were needed/pushed.
9191 This is used by arm_memory_load_p() which needs to know about loads
9192 of constants that will be converted into minipool loads. */
9194 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
9196 bool result = false;
9199 extract_insn (insn);
9201 if (!constrain_operands (1))
9202 fatal_insn_not_found (insn);
9204 if (recog_data.n_alternatives == 0)
9207 /* Fill in recog_op_alt with information about the constraints of
9209 preprocess_constraints ();
9211 for (opno = 0; opno < recog_data.n_operands; opno++)
9213 /* Things we need to fix can only occur in inputs. */
9214 if (recog_data.operand_type[opno] != OP_IN)
9217 /* If this alternative is a memory reference, then any mention
9218 of constants in this alternative is really to fool reload
9219 into allowing us to accept one there. We need to fix them up
9220 now so that we output the right code. */
9221 if (recog_op_alt[opno][which_alternative].memory_ok)
9223 rtx op = recog_data.operand[opno];
9225 if (CONSTANT_P (op))
9228 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
9229 recog_data.operand_mode[opno], op);
9232 else if (GET_CODE (op) == MEM
9233 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
9234 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
9238 rtx cop = avoid_constant_pool_reference (op);
9240 /* Casting the address of something to a mode narrower
9241 than a word can cause avoid_constant_pool_reference()
9242 to return the pool reference itself. That's no good to
9243 us here. Lets just hope that we can use the
9244 constant pool value directly. */
9246 cop = get_pool_constant (XEXP (op, 0));
9248 push_minipool_fix (insn, address,
9249 recog_data.operand_loc[opno],
9250 recog_data.operand_mode[opno], cop);
9261 /* Gcc puts the pool in the wrong place for ARM, since we can only
9262 load addresses a limited distance around the pc. We do some
9263 special munging to move the constant pool values to the correct
9264 point in the code. */
9269 HOST_WIDE_INT address = 0;
9272 minipool_fix_head = minipool_fix_tail = NULL;
9274 /* The first insn must always be a note, or the code below won't
9275 scan it properly. */
9276 insn = get_insns ();
9277 gcc_assert (GET_CODE (insn) == NOTE);
9280 /* Scan all the insns and record the operands that will need fixing. */
9281 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
9283 if (TARGET_CIRRUS_FIX_INVALID_INSNS
9284 && (arm_cirrus_insn_p (insn)
9285 || GET_CODE (insn) == JUMP_INSN
9286 || arm_memory_load_p (insn)))
9287 cirrus_reorg (insn);
9289 if (GET_CODE (insn) == BARRIER)
9290 push_minipool_barrier (insn, address);
9291 else if (INSN_P (insn))
9295 note_invalid_constants (insn, address, true);
9296 address += get_attr_length (insn);
9298 /* If the insn is a vector jump, add the size of the table
9299 and skip the table. */
9300 if ((table = is_jump_table (insn)) != NULL)
9302 address += get_jump_table_size (table);
9308 fix = minipool_fix_head;
9310 /* Now scan the fixups and perform the required changes. */
9315 Mfix * last_added_fix;
9316 Mfix * last_barrier = NULL;
9319 /* Skip any further barriers before the next fix. */
9320 while (fix && GET_CODE (fix->insn) == BARRIER)
9323 /* No more fixes. */
9327 last_added_fix = NULL;
9329 for (ftmp = fix; ftmp; ftmp = ftmp->next)
9331 if (GET_CODE (ftmp->insn) == BARRIER)
9333 if (ftmp->address >= minipool_vector_head->max_address)
9336 last_barrier = ftmp;
9338 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
9341 last_added_fix = ftmp; /* Keep track of the last fix added. */
9344 /* If we found a barrier, drop back to that; any fixes that we
9345 could have reached but come after the barrier will now go in
9346 the next mini-pool. */
9347 if (last_barrier != NULL)
9349 /* Reduce the refcount for those fixes that won't go into this
9351 for (fdel = last_barrier->next;
9352 fdel && fdel != ftmp;
9355 fdel->minipool->refcount--;
9356 fdel->minipool = NULL;
9359 ftmp = last_barrier;
9363 /* ftmp is first fix that we can't fit into this pool and
9364 there no natural barriers that we could use. Insert a
9365 new barrier in the code somewhere between the previous
9366 fix and this one, and arrange to jump around it. */
9367 HOST_WIDE_INT max_address;
9369 /* The last item on the list of fixes must be a barrier, so
9370 we can never run off the end of the list of fixes without
9371 last_barrier being set. */
9374 max_address = minipool_vector_head->max_address;
9375 /* Check that there isn't another fix that is in range that
9376 we couldn't fit into this pool because the pool was
9377 already too large: we need to put the pool before such an
9378 instruction. The pool itself may come just after the
9379 fix because create_fix_barrier also allows space for a
9380 jump instruction. */
9381 if (ftmp->address < max_address)
9382 max_address = ftmp->address + 1;
9384 last_barrier = create_fix_barrier (last_added_fix, max_address);
9387 assign_minipool_offsets (last_barrier);
9391 if (GET_CODE (ftmp->insn) != BARRIER
9392 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
9399 /* Scan over the fixes we have identified for this pool, fixing them
9400 up and adding the constants to the pool itself. */
9401 for (this_fix = fix; this_fix && ftmp != this_fix;
9402 this_fix = this_fix->next)
9403 if (GET_CODE (this_fix->insn) != BARRIER)
9406 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
9407 minipool_vector_label),
9408 this_fix->minipool->offset);
9409 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
9412 dump_minipool (last_barrier->insn);
9416 /* From now on we must synthesize any constants that we can't handle
9417 directly. This can happen if the RTL gets split during final
9418 instruction generation. */
9419 after_arm_reorg = 1;
9421 /* Free the minipool memory. */
9422 obstack_free (&minipool_obstack, minipool_startobj);
9425 /* Routines to output assembly language. */
9427 /* If the rtx is the correct value then return the string of the number.
9428 In this way we can ensure that valid double constants are generated even
9429 when cross compiling. */
9431 fp_immediate_constant (rtx x)
9436 if (!fp_consts_inited)
9439 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9440 for (i = 0; i < 8; i++)
9441 if (REAL_VALUES_EQUAL (r, values_fp[i]))
9442 return strings_fp[i];
9447 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
9449 fp_const_from_val (REAL_VALUE_TYPE *r)
9453 if (!fp_consts_inited)
9456 for (i = 0; i < 8; i++)
9457 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
9458 return strings_fp[i];
9463 /* Output the operands of a LDM/STM instruction to STREAM.
9464 MASK is the ARM register set mask of which only bits 0-15 are important.
9465 REG is the base register, either the frame pointer or the stack pointer,
9466 INSTR is the possibly suffixed load or store instruction.
9467 RFE is nonzero if the instruction should also copy spsr to cpsr. */
9470 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
9471 unsigned long mask, int rfe)
9474 bool not_first = FALSE;
9476 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
9477 fputc ('\t', stream);
9478 asm_fprintf (stream, instr, reg);
9479 fputc ('{', stream);
9481 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9482 if (mask & (1 << i))
9485 fprintf (stream, ", ");
9487 asm_fprintf (stream, "%r", i);
9492 fprintf (stream, "}^\n");
9494 fprintf (stream, "}\n");
9498 /* Output a FLDMD instruction to STREAM.
9499 BASE if the register containing the address.
9500 REG and COUNT specify the register range.
9501 Extra registers may be added to avoid hardware bugs.
9503 We output FLDMD even for ARMv5 VFP implementations. Although
9504 FLDMD is technically not supported until ARMv6, it is believed
9505 that all VFP implementations support its use in this context. */
9508 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
9512 /* Workaround ARM10 VFPr1 bug. */
9513 if (count == 2 && !arm_arch6)
9520 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
9521 load into multiple parts if we have to handle more than 16 registers. */
9524 vfp_output_fldmd (stream, base, reg, 16);
9525 vfp_output_fldmd (stream, base, reg + 16, count - 16);
9529 fputc ('\t', stream);
9530 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
9532 for (i = reg; i < reg + count; i++)
9535 fputs (", ", stream);
9536 asm_fprintf (stream, "d%d", i);
9538 fputs ("}\n", stream);
9543 /* Output the assembly for a store multiple. */
9546 vfp_output_fstmd (rtx * operands)
9553 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
9554 p = strlen (pattern);
9556 gcc_assert (GET_CODE (operands[1]) == REG);
9558 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
9559 for (i = 1; i < XVECLEN (operands[2], 0); i++)
9561 p += sprintf (&pattern[p], ", d%d", base + i);
9563 strcpy (&pattern[p], "}");
9565 output_asm_insn (pattern, operands);
9570 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
9571 number of bytes pushed. */
9574 vfp_emit_fstmd (int base_reg, int count)
9581 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
9582 register pairs are stored by a store multiple insn. We avoid this
9583 by pushing an extra pair. */
9584 if (count == 2 && !arm_arch6)
9586 if (base_reg == LAST_VFP_REGNUM - 3)
9591 /* FSTMD may not store more than 16 doubleword registers at once. Split
9592 larger stores into multiple parts (up to a maximum of two, in
9597 /* NOTE: base_reg is an internal register number, so each D register
9599 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
9600 saved += vfp_emit_fstmd (base_reg, 16);
9604 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
9605 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
9607 reg = gen_rtx_REG (DFmode, base_reg);
9611 = gen_rtx_SET (VOIDmode,
9612 gen_frame_mem (BLKmode,
9613 gen_rtx_PRE_DEC (BLKmode,
9614 stack_pointer_rtx)),
9615 gen_rtx_UNSPEC (BLKmode,
9619 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9620 plus_constant (stack_pointer_rtx, -(count * 8)));
9621 RTX_FRAME_RELATED_P (tmp) = 1;
9622 XVECEXP (dwarf, 0, 0) = tmp;
9624 tmp = gen_rtx_SET (VOIDmode,
9625 gen_frame_mem (DFmode, stack_pointer_rtx),
9627 RTX_FRAME_RELATED_P (tmp) = 1;
9628 XVECEXP (dwarf, 0, 1) = tmp;
9630 for (i = 1; i < count; i++)
9632 reg = gen_rtx_REG (DFmode, base_reg);
9634 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
9636 tmp = gen_rtx_SET (VOIDmode,
9637 gen_frame_mem (DFmode,
9638 plus_constant (stack_pointer_rtx,
9641 RTX_FRAME_RELATED_P (tmp) = 1;
9642 XVECEXP (dwarf, 0, i + 1) = tmp;
9645 par = emit_insn (par);
9646 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9648 RTX_FRAME_RELATED_P (par) = 1;
9653 /* Emit a call instruction with pattern PAT. ADDR is the address of
9657 arm_emit_call_insn (rtx pat, rtx addr)
9661 insn = emit_call_insn (pat);
9663 /* The PIC register is live on entry to VxWorks PIC PLT entries.
9664 If the call might use such an entry, add a use of the PIC register
9665 to the instruction's CALL_INSN_FUNCTION_USAGE. */
9666 if (TARGET_VXWORKS_RTP
9668 && GET_CODE (addr) == SYMBOL_REF
9669 && (SYMBOL_REF_DECL (addr)
9670 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
9671 : !SYMBOL_REF_LOCAL_P (addr)))
9673 require_pic_register ();
9674 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
9678 /* Output a 'call' insn. */
9680 output_call (rtx *operands)
9682 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
9684 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
9685 if (REGNO (operands[0]) == LR_REGNUM)
9687 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
9688 output_asm_insn ("mov%?\t%0, %|lr", operands);
9691 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9693 if (TARGET_INTERWORK || arm_arch4t)
9694 output_asm_insn ("bx%?\t%0", operands);
9696 output_asm_insn ("mov%?\t%|pc, %0", operands);
9701 /* Output a 'call' insn that is a reference in memory. */
9703 output_call_mem (rtx *operands)
9705 if (TARGET_INTERWORK && !arm_arch5)
9707 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9708 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9709 output_asm_insn ("bx%?\t%|ip", operands);
9711 else if (regno_use_in (LR_REGNUM, operands[0]))
9713 /* LR is used in the memory address. We load the address in the
9714 first instruction. It's safe to use IP as the target of the
9715 load since the call will kill it anyway. */
9716 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9718 output_asm_insn ("blx%?\t%|ip", operands);
9721 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9723 output_asm_insn ("bx%?\t%|ip", operands);
9725 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
9730 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9731 output_asm_insn ("ldr%?\t%|pc, %0", operands);
9738 /* Output a move from arm registers to an fpa registers.
9739 OPERANDS[0] is an fpa register.
9740 OPERANDS[1] is the first registers of an arm register pair. */
9742 output_mov_long_double_fpa_from_arm (rtx *operands)
9744 int arm_reg0 = REGNO (operands[1]);
9747 gcc_assert (arm_reg0 != IP_REGNUM);
9749 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9750 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9751 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9753 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9754 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
9759 /* Output a move from an fpa register to arm registers.
9760 OPERANDS[0] is the first registers of an arm register pair.
9761 OPERANDS[1] is an fpa register. */
9763 output_mov_long_double_arm_from_fpa (rtx *operands)
9765 int arm_reg0 = REGNO (operands[0]);
9768 gcc_assert (arm_reg0 != IP_REGNUM);
9770 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9771 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9772 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9774 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
9775 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9779 /* Output a move from arm registers to arm registers of a long double
9780 OPERANDS[0] is the destination.
9781 OPERANDS[1] is the source. */
9783 output_mov_long_double_arm_from_arm (rtx *operands)
9785 /* We have to be careful here because the two might overlap. */
9786 int dest_start = REGNO (operands[0]);
9787 int src_start = REGNO (operands[1]);
9791 if (dest_start < src_start)
9793 for (i = 0; i < 3; i++)
9795 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9796 ops[1] = gen_rtx_REG (SImode, src_start + i);
9797 output_asm_insn ("mov%?\t%0, %1", ops);
9802 for (i = 2; i >= 0; i--)
9804 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9805 ops[1] = gen_rtx_REG (SImode, src_start + i);
9806 output_asm_insn ("mov%?\t%0, %1", ops);
9814 /* Output a move from arm registers to an fpa registers.
9815 OPERANDS[0] is an fpa register.
9816 OPERANDS[1] is the first registers of an arm register pair. */
9818 output_mov_double_fpa_from_arm (rtx *operands)
9820 int arm_reg0 = REGNO (operands[1]);
9823 gcc_assert (arm_reg0 != IP_REGNUM);
9825 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9826 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9827 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
9828 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
9832 /* Output a move from an fpa register to arm registers.
9833 OPERANDS[0] is the first registers of an arm register pair.
9834 OPERANDS[1] is an fpa register. */
9836 output_mov_double_arm_from_fpa (rtx *operands)
9838 int arm_reg0 = REGNO (operands[0]);
9841 gcc_assert (arm_reg0 != IP_REGNUM);
9843 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9844 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9845 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
9846 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
9850 /* Output a move between double words.
9851 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
9852 or MEM<-REG and all MEMs must be offsettable addresses. */
9854 output_move_double (rtx *operands)
9856 enum rtx_code code0 = GET_CODE (operands[0]);
9857 enum rtx_code code1 = GET_CODE (operands[1]);
9862 int reg0 = REGNO (operands[0]);
9864 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9866 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
9868 switch (GET_CODE (XEXP (operands[1], 0)))
9871 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
9875 gcc_assert (TARGET_LDRD);
9876 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
9881 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
9883 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
9887 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
9891 gcc_assert (TARGET_LDRD);
9892 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
9897 otherops[0] = operands[0];
9898 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
9899 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
9901 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
9903 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
9905 /* Registers overlap so split out the increment. */
9906 output_asm_insn ("add%?\t%1, %1, %2", otherops);
9907 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
9911 /* IWMMXT allows offsets larger than ldrd can handle,
9912 fix these up with a pair of ldr. */
9913 if (GET_CODE (otherops[2]) == CONST_INT
9914 && (INTVAL(otherops[2]) <= -256
9915 || INTVAL(otherops[2]) >= 256))
9917 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
9918 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9919 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9922 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
9927 /* IWMMXT allows offsets larger than ldrd can handle,
9928 fix these up with a pair of ldr. */
9929 if (GET_CODE (otherops[2]) == CONST_INT
9930 && (INTVAL(otherops[2]) <= -256
9931 || INTVAL(otherops[2]) >= 256))
9933 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9934 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9935 otherops[0] = operands[0];
9936 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
9939 /* We only allow constant increments, so this is safe. */
9940 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
9946 output_asm_insn ("adr%?\t%0, %1", operands);
9947 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
9950 /* ??? This needs checking for thumb2. */
9952 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
9953 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
9955 otherops[0] = operands[0];
9956 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
9957 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
9959 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
9961 if (GET_CODE (otherops[2]) == CONST_INT)
9963 switch ((int) INTVAL (otherops[2]))
9966 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
9971 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
9976 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
9981 && (GET_CODE (otherops[2]) == REG
9982 || (GET_CODE (otherops[2]) == CONST_INT
9983 && INTVAL (otherops[2]) > -256
9984 && INTVAL (otherops[2]) < 256)))
9986 if (reg_overlap_mentioned_p (otherops[0],
9989 /* Swap base and index registers over to
9990 avoid a conflict. */
9991 otherops[1] = XEXP (XEXP (operands[1], 0), 1);
9992 otherops[2] = XEXP (XEXP (operands[1], 0), 0);
9994 /* If both registers conflict, it will usually
9995 have been fixed by a splitter. */
9996 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
9998 output_asm_insn ("add%?\t%1, %1, %2", otherops);
9999 output_asm_insn ("ldr%(d%)\t%0, [%1]",
10003 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
10007 if (GET_CODE (otherops[2]) == CONST_INT)
10009 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
10010 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
10012 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10015 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10018 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
10020 return "ldm%(ia%)\t%0, %M0";
10024 otherops[1] = adjust_address (operands[1], SImode, 4);
10025 /* Take care of overlapping base/data reg. */
10026 if (reg_mentioned_p (operands[0], operands[1]))
10028 output_asm_insn ("ldr%?\t%0, %1", otherops);
10029 output_asm_insn ("ldr%?\t%0, %1", operands);
10033 output_asm_insn ("ldr%?\t%0, %1", operands);
10034 output_asm_insn ("ldr%?\t%0, %1", otherops);
10041 /* Constraints should ensure this. */
10042 gcc_assert (code0 == MEM && code1 == REG);
10043 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
10045 switch (GET_CODE (XEXP (operands[0], 0)))
10048 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10052 gcc_assert (TARGET_LDRD);
10053 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
10058 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
10060 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
10064 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
10068 gcc_assert (TARGET_LDRD);
10069 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
10074 otherops[0] = operands[1];
10075 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
10076 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
10078 /* IWMMXT allows offsets larger than ldrd can handle,
10079 fix these up with a pair of ldr. */
10080 if (GET_CODE (otherops[2]) == CONST_INT
10081 && (INTVAL(otherops[2]) <= -256
10082 || INTVAL(otherops[2]) >= 256))
10085 reg1 = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10086 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10088 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10089 otherops[0] = reg1;
10090 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10094 otherops[0] = reg1;
10095 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10096 otherops[0] = operands[1];
10097 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10100 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10101 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
10103 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
10107 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
10108 if (GET_CODE (otherops[2]) == CONST_INT)
10110 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
10113 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
10119 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
10125 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
10130 && (GET_CODE (otherops[2]) == REG
10131 || (GET_CODE (otherops[2]) == CONST_INT
10132 && INTVAL (otherops[2]) > -256
10133 && INTVAL (otherops[2]) < 256)))
10135 otherops[0] = operands[1];
10136 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
10137 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
10143 otherops[0] = adjust_address (operands[0], SImode, 4);
10144 otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10145 output_asm_insn ("str%?\t%1, %0", operands);
10146 output_asm_insn ("str%?\t%1, %0", otherops);
10153 /* Output a move, load or store for quad-word vectors in ARM registers. Only
10154 handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
10157 output_move_quad (rtx *operands)
10159 if (REG_P (operands[0]))
10161 /* Load, or reg->reg move. */
10163 if (MEM_P (operands[1]))
10165 switch (GET_CODE (XEXP (operands[1], 0)))
10168 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10173 output_asm_insn ("adr%?\t%0, %1", operands);
10174 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
10178 gcc_unreachable ();
10186 gcc_assert (REG_P (operands[1]));
10188 dest = REGNO (operands[0]);
10189 src = REGNO (operands[1]);
10191 /* This seems pretty dumb, but hopefully GCC won't try to do it
10194 for (i = 0; i < 4; i++)
10196 ops[0] = gen_rtx_REG (SImode, dest + i);
10197 ops[1] = gen_rtx_REG (SImode, src + i);
10198 output_asm_insn ("mov%?\t%0, %1", ops);
10201 for (i = 3; i >= 0; i--)
10203 ops[0] = gen_rtx_REG (SImode, dest + i);
10204 ops[1] = gen_rtx_REG (SImode, src + i);
10205 output_asm_insn ("mov%?\t%0, %1", ops);
10211 gcc_assert (MEM_P (operands[0]));
10212 gcc_assert (REG_P (operands[1]));
10213 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
10215 switch (GET_CODE (XEXP (operands[0], 0)))
10218 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10222 gcc_unreachable ();
10229 /* Output a VFP load or store instruction. */
10232 output_move_vfp (rtx *operands)
10234 rtx reg, mem, addr, ops[2];
10235 int load = REG_P (operands[0]);
10236 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
10237 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
10238 const char *template;
10240 enum machine_mode mode;
10242 reg = operands[!load];
10243 mem = operands[load];
10245 mode = GET_MODE (reg);
10247 gcc_assert (REG_P (reg));
10248 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
10249 gcc_assert (mode == SFmode
10253 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
10254 gcc_assert (MEM_P (mem));
10256 addr = XEXP (mem, 0);
10258 switch (GET_CODE (addr))
10261 template = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
10262 ops[0] = XEXP (addr, 0);
10267 template = "f%smia%c%%?\t%%0!, {%%%s1}%s";
10268 ops[0] = XEXP (addr, 0);
10273 template = "f%s%c%%?\t%%%s0, %%1%s";
10279 sprintf (buff, template,
10280 load ? "ld" : "st",
10283 integer_p ? "\t%@ int" : "");
10284 output_asm_insn (buff, ops);
10289 /* Output a Neon quad-word load or store, or a load or store for
10290 larger structure modes. We could also support post-modify forms using
10291 VLD1/VST1 (for the vectorizer, and perhaps otherwise), but we don't do that
10293 WARNING: The ordering of elements in memory is weird in big-endian mode,
10294 because we use VSTM instead of VST1, to make it easy to make vector stores
10295 via ARM registers write values in the same order as stores direct from Neon
10296 registers. For example, the byte ordering of a quadword vector with 16-byte
10297 elements like this:
10299 [e7:e6:e5:e4:e3:e2:e1:e0] (highest-numbered element first)
10301 will be (with lowest address first, h = most-significant byte,
10302 l = least-significant byte of element):
10304 [e3h, e3l, e2h, e2l, e1h, e1l, e0h, e0l,
10305 e7h, e7l, e6h, e6l, e5h, e5l, e4h, e4l]
10307 When necessary, quadword registers (dN, dN+1) are moved to ARM registers from
10310 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
10312 So that STM/LDM can be used on vectors in ARM registers, and the same memory
10313 layout will result as if VSTM/VLDM were used. */
10316 output_move_neon (rtx *operands)
10318 rtx reg, mem, addr, ops[2];
10319 int regno, load = REG_P (operands[0]);
10320 const char *template;
10322 enum machine_mode mode;
10324 reg = operands[!load];
10325 mem = operands[load];
10327 mode = GET_MODE (reg);
10329 gcc_assert (REG_P (reg));
10330 regno = REGNO (reg);
10331 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
10332 || NEON_REGNO_OK_FOR_QUAD (regno));
10333 gcc_assert (VALID_NEON_DREG_MODE (mode)
10334 || VALID_NEON_QREG_MODE (mode)
10335 || VALID_NEON_STRUCT_MODE (mode));
10336 gcc_assert (MEM_P (mem));
10338 addr = XEXP (mem, 0);
10340 /* Strip off const from addresses like (const (plus (...))). */
10341 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
10342 addr = XEXP (addr, 0);
10344 switch (GET_CODE (addr))
10347 template = "v%smia%%?\t%%0!, %%h1";
10348 ops[0] = XEXP (addr, 0);
10353 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
10354 gcc_unreachable ();
10359 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
10362 for (i = 0; i < nregs; i++)
10364 /* We're only using DImode here because it's a convenient size. */
10365 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
10366 ops[1] = adjust_address (mem, SImode, 8 * i);
10367 if (reg_overlap_mentioned_p (ops[0], mem))
10369 gcc_assert (overlap == -1);
10374 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10375 output_asm_insn (buff, ops);
10380 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
10381 ops[1] = adjust_address (mem, SImode, 8 * overlap);
10382 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10383 output_asm_insn (buff, ops);
10390 template = "v%smia%%?\t%%m0, %%h1";
10395 sprintf (buff, template, load ? "ld" : "st");
10396 output_asm_insn (buff, ops);
10401 /* Output an ADD r, s, #n where n may be too big for one instruction.
10402 If adding zero to one register, output nothing. */
10404 output_add_immediate (rtx *operands)
10406 HOST_WIDE_INT n = INTVAL (operands[2]);
10408 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
10411 output_multi_immediate (operands,
10412 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
10415 output_multi_immediate (operands,
10416 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
10423 /* Output a multiple immediate operation.
10424 OPERANDS is the vector of operands referred to in the output patterns.
10425 INSTR1 is the output pattern to use for the first constant.
10426 INSTR2 is the output pattern to use for subsequent constants.
10427 IMMED_OP is the index of the constant slot in OPERANDS.
10428 N is the constant value. */
10429 static const char *
10430 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
10431 int immed_op, HOST_WIDE_INT n)
10433 #if HOST_BITS_PER_WIDE_INT > 32
10439 /* Quick and easy output. */
10440 operands[immed_op] = const0_rtx;
10441 output_asm_insn (instr1, operands);
10446 const char * instr = instr1;
10448 /* Note that n is never zero here (which would give no output). */
10449 for (i = 0; i < 32; i += 2)
10453 operands[immed_op] = GEN_INT (n & (255 << i));
10454 output_asm_insn (instr, operands);
10464 /* Return the name of a shifter operation. */
10465 static const char *
10466 arm_shift_nmem(enum rtx_code code)
10471 return ARM_LSL_NAME;
10487 /* Return the appropriate ARM instruction for the operation code.
10488 The returned result should not be overwritten. OP is the rtx of the
10489 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
10492 arithmetic_instr (rtx op, int shift_first_arg)
10494 switch (GET_CODE (op))
10500 return shift_first_arg ? "rsb" : "sub";
10515 return arm_shift_nmem(GET_CODE(op));
10518 gcc_unreachable ();
10522 /* Ensure valid constant shifts and return the appropriate shift mnemonic
10523 for the operation code. The returned result should not be overwritten.
10524 OP is the rtx code of the shift.
10525 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
10527 static const char *
10528 shift_op (rtx op, HOST_WIDE_INT *amountp)
10531 enum rtx_code code = GET_CODE (op);
10533 switch (GET_CODE (XEXP (op, 1)))
10541 *amountp = INTVAL (XEXP (op, 1));
10545 gcc_unreachable ();
10551 gcc_assert (*amountp != -1);
10552 *amountp = 32 - *amountp;
10555 /* Fall through. */
10561 mnem = arm_shift_nmem(code);
10565 /* We never have to worry about the amount being other than a
10566 power of 2, since this case can never be reloaded from a reg. */
10567 gcc_assert (*amountp != -1);
10568 *amountp = int_log2 (*amountp);
10569 return ARM_LSL_NAME;
10572 gcc_unreachable ();
10575 if (*amountp != -1)
10577 /* This is not 100% correct, but follows from the desire to merge
10578 multiplication by a power of 2 with the recognizer for a
10579 shift. >=32 is not a valid shift for "lsl", so we must try and
10580 output a shift that produces the correct arithmetical result.
10581 Using lsr #32 is identical except for the fact that the carry bit
10582 is not set correctly if we set the flags; but we never use the
10583 carry bit from such an operation, so we can ignore that. */
10584 if (code == ROTATERT)
10585 /* Rotate is just modulo 32. */
10587 else if (*amountp != (*amountp & 31))
10589 if (code == ASHIFT)
10594 /* Shifts of 0 are no-ops. */
10602 /* Obtain the shift from the POWER of two. */
10604 static HOST_WIDE_INT
10605 int_log2 (HOST_WIDE_INT power)
10607 HOST_WIDE_INT shift = 0;
10609 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
10611 gcc_assert (shift <= 31);
10618 /* Output a .ascii pseudo-op, keeping track of lengths. This is
10619 because /bin/as is horribly restrictive. The judgement about
10620 whether or not each character is 'printable' (and can be output as
10621 is) or not (and must be printed with an octal escape) must be made
10622 with reference to the *host* character set -- the situation is
10623 similar to that discussed in the comments above pp_c_char in
10624 c-pretty-print.c. */
10626 #define MAX_ASCII_LEN 51
10629 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
10632 int len_so_far = 0;
10634 fputs ("\t.ascii\t\"", stream);
10636 for (i = 0; i < len; i++)
10640 if (len_so_far >= MAX_ASCII_LEN)
10642 fputs ("\"\n\t.ascii\t\"", stream);
10648 if (c == '\\' || c == '\"')
10650 putc ('\\', stream);
10658 fprintf (stream, "\\%03o", c);
10663 fputs ("\"\n", stream);
10666 /* Compute the register save mask for registers 0 through 12
10667 inclusive. This code is used by arm_compute_save_reg_mask. */
10669 static unsigned long
10670 arm_compute_save_reg0_reg12_mask (void)
10672 unsigned long func_type = arm_current_func_type ();
10673 unsigned long save_reg_mask = 0;
10676 if (IS_INTERRUPT (func_type))
10678 unsigned int max_reg;
10679 /* Interrupt functions must not corrupt any registers,
10680 even call clobbered ones. If this is a leaf function
10681 we can just examine the registers used by the RTL, but
10682 otherwise we have to assume that whatever function is
10683 called might clobber anything, and so we have to save
10684 all the call-clobbered registers as well. */
10685 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
10686 /* FIQ handlers have registers r8 - r12 banked, so
10687 we only need to check r0 - r7, Normal ISRs only
10688 bank r14 and r15, so we must check up to r12.
10689 r13 is the stack pointer which is always preserved,
10690 so we do not need to consider it here. */
10695 for (reg = 0; reg <= max_reg; reg++)
10696 if (df_regs_ever_live_p (reg)
10697 || (! current_function_is_leaf && call_used_regs[reg]))
10698 save_reg_mask |= (1 << reg);
10700 /* Also save the pic base register if necessary. */
10702 && !TARGET_SINGLE_PIC_BASE
10703 && arm_pic_register != INVALID_REGNUM
10704 && current_function_uses_pic_offset_table)
10705 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10709 /* In the normal case we only need to save those registers
10710 which are call saved and which are used by this function. */
10711 for (reg = 0; reg <= 11; reg++)
10712 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
10713 save_reg_mask |= (1 << reg);
10715 /* Handle the frame pointer as a special case. */
10716 if (frame_pointer_needed)
10717 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
10719 /* If we aren't loading the PIC register,
10720 don't stack it even though it may be live. */
10722 && !TARGET_SINGLE_PIC_BASE
10723 && arm_pic_register != INVALID_REGNUM
10724 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
10725 || current_function_uses_pic_offset_table))
10726 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10728 /* The prologue will copy SP into R0, so save it. */
10729 if (IS_STACKALIGN (func_type))
10730 save_reg_mask |= 1;
10733 /* Save registers so the exception handler can modify them. */
10734 if (current_function_calls_eh_return)
10740 reg = EH_RETURN_DATA_REGNO (i);
10741 if (reg == INVALID_REGNUM)
10743 save_reg_mask |= 1 << reg;
10747 return save_reg_mask;
10751 /* Compute a bit mask of which registers need to be
10752 saved on the stack for the current function.
10753 This is used by arm_get_frame_offsets, which may add extra registers. */
10755 static unsigned long
10756 arm_compute_save_reg_mask (void)
10758 unsigned int save_reg_mask = 0;
10759 unsigned long func_type = arm_current_func_type ();
10762 if (IS_NAKED (func_type))
10763 /* This should never really happen. */
10766 /* If we are creating a stack frame, then we must save the frame pointer,
10767 IP (which will hold the old stack pointer), LR and the PC. */
10768 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
10770 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
10773 | (1 << PC_REGNUM);
10775 /* Volatile functions do not return, so there
10776 is no need to save any other registers. */
10777 if (IS_VOLATILE (func_type))
10778 return save_reg_mask;
10780 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
10782 /* Decide if we need to save the link register.
10783 Interrupt routines have their own banked link register,
10784 so they never need to save it.
10785 Otherwise if we do not use the link register we do not need to save
10786 it. If we are pushing other registers onto the stack however, we
10787 can save an instruction in the epilogue by pushing the link register
10788 now and then popping it back into the PC. This incurs extra memory
10789 accesses though, so we only do it when optimizing for size, and only
10790 if we know that we will not need a fancy return sequence. */
10791 if (df_regs_ever_live_p (LR_REGNUM)
10794 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
10795 && !current_function_calls_eh_return))
10796 save_reg_mask |= 1 << LR_REGNUM;
10798 if (cfun->machine->lr_save_eliminated)
10799 save_reg_mask &= ~ (1 << LR_REGNUM);
10801 if (TARGET_REALLY_IWMMXT
10802 && ((bit_count (save_reg_mask)
10803 + ARM_NUM_INTS (current_function_pretend_args_size)) % 2) != 0)
10805 /* The total number of registers that are going to be pushed
10806 onto the stack is odd. We need to ensure that the stack
10807 is 64-bit aligned before we start to save iWMMXt registers,
10808 and also before we start to create locals. (A local variable
10809 might be a double or long long which we will load/store using
10810 an iWMMXt instruction). Therefore we need to push another
10811 ARM register, so that the stack will be 64-bit aligned. We
10812 try to avoid using the arg registers (r0 -r3) as they might be
10813 used to pass values in a tail call. */
10814 for (reg = 4; reg <= 12; reg++)
10815 if ((save_reg_mask & (1 << reg)) == 0)
10819 save_reg_mask |= (1 << reg);
10822 cfun->machine->sibcall_blocked = 1;
10823 save_reg_mask |= (1 << 3);
10827 /* We may need to push an additional register for use initializing the
10828 PIC base register. */
10829 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
10830 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
10832 reg = thumb_find_work_register (1 << 4);
10833 if (!call_used_regs[reg])
10834 save_reg_mask |= (1 << reg);
10837 return save_reg_mask;
10841 /* Compute a bit mask of which registers need to be
10842 saved on the stack for the current function. */
10843 static unsigned long
10844 thumb1_compute_save_reg_mask (void)
10846 unsigned long mask;
10850 for (reg = 0; reg < 12; reg ++)
10851 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
10855 && !TARGET_SINGLE_PIC_BASE
10856 && arm_pic_register != INVALID_REGNUM
10857 && current_function_uses_pic_offset_table)
10858 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10860 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
10861 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
10862 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
10864 /* LR will also be pushed if any lo regs are pushed. */
10865 if (mask & 0xff || thumb_force_lr_save ())
10866 mask |= (1 << LR_REGNUM);
10868 /* Make sure we have a low work register if we need one.
10869 We will need one if we are going to push a high register,
10870 but we are not currently intending to push a low register. */
10871 if ((mask & 0xff) == 0
10872 && ((mask & 0x0f00) || TARGET_BACKTRACE))
10874 /* Use thumb_find_work_register to choose which register
10875 we will use. If the register is live then we will
10876 have to push it. Use LAST_LO_REGNUM as our fallback
10877 choice for the register to select. */
10878 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
10879 /* Make sure the register returned by thumb_find_work_register is
10880 not part of the return value. */
10881 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
10882 reg = LAST_LO_REGNUM;
10884 if (! call_used_regs[reg])
10892 /* Return the number of bytes required to save VFP registers. */
10894 arm_get_vfp_saved_size (void)
10896 unsigned int regno;
10901 /* Space for saved VFP registers. */
10902 if (TARGET_HARD_FLOAT && TARGET_VFP)
10905 for (regno = FIRST_VFP_REGNUM;
10906 regno < LAST_VFP_REGNUM;
10909 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
10910 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
10914 /* Workaround ARM10 VFPr1 bug. */
10915 if (count == 2 && !arm_arch6)
10917 saved += count * 8;
10926 if (count == 2 && !arm_arch6)
10928 saved += count * 8;
10935 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
10936 everything bar the final return instruction. */
10938 output_return_instruction (rtx operand, int really_return, int reverse)
10940 char conditional[10];
10943 unsigned long live_regs_mask;
10944 unsigned long func_type;
10945 arm_stack_offsets *offsets;
10947 func_type = arm_current_func_type ();
10949 if (IS_NAKED (func_type))
10952 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
10954 /* If this function was declared non-returning, and we have
10955 found a tail call, then we have to trust that the called
10956 function won't return. */
10961 /* Otherwise, trap an attempted return by aborting. */
10963 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
10965 assemble_external_libcall (ops[1]);
10966 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
10972 gcc_assert (!current_function_calls_alloca || really_return);
10974 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
10976 return_used_this_function = 1;
10978 offsets = arm_get_frame_offsets ();
10979 live_regs_mask = offsets->saved_regs_mask;
10981 if (live_regs_mask)
10983 const char * return_reg;
10985 /* If we do not have any special requirements for function exit
10986 (e.g. interworking) then we can load the return address
10987 directly into the PC. Otherwise we must load it into LR. */
10989 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
10990 return_reg = reg_names[PC_REGNUM];
10992 return_reg = reg_names[LR_REGNUM];
10994 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
10996 /* There are three possible reasons for the IP register
10997 being saved. 1) a stack frame was created, in which case
10998 IP contains the old stack pointer, or 2) an ISR routine
10999 corrupted it, or 3) it was saved to align the stack on
11000 iWMMXt. In case 1, restore IP into SP, otherwise just
11002 if (frame_pointer_needed)
11004 live_regs_mask &= ~ (1 << IP_REGNUM);
11005 live_regs_mask |= (1 << SP_REGNUM);
11008 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
11011 /* On some ARM architectures it is faster to use LDR rather than
11012 LDM to load a single register. On other architectures, the
11013 cost is the same. In 26 bit mode, or for exception handlers,
11014 we have to use LDM to load the PC so that the CPSR is also
11016 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11017 if (live_regs_mask == (1U << reg))
11020 if (reg <= LAST_ARM_REGNUM
11021 && (reg != LR_REGNUM
11023 || ! IS_INTERRUPT (func_type)))
11025 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
11026 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
11033 /* Generate the load multiple instruction to restore the
11034 registers. Note we can get here, even if
11035 frame_pointer_needed is true, but only if sp already
11036 points to the base of the saved core registers. */
11037 if (live_regs_mask & (1 << SP_REGNUM))
11039 unsigned HOST_WIDE_INT stack_adjust;
11041 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
11042 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
11044 if (stack_adjust && arm_arch5 && TARGET_ARM)
11045 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
11048 /* If we can't use ldmib (SA110 bug),
11049 then try to pop r3 instead. */
11051 live_regs_mask |= 1 << 3;
11052 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
11056 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
11058 p = instr + strlen (instr);
11060 for (reg = 0; reg <= SP_REGNUM; reg++)
11061 if (live_regs_mask & (1 << reg))
11063 int l = strlen (reg_names[reg]);
11069 memcpy (p, ", ", 2);
11073 memcpy (p, "%|", 2);
11074 memcpy (p + 2, reg_names[reg], l);
11078 if (live_regs_mask & (1 << LR_REGNUM))
11080 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
11081 /* If returning from an interrupt, restore the CPSR. */
11082 if (IS_INTERRUPT (func_type))
11089 output_asm_insn (instr, & operand);
11091 /* See if we need to generate an extra instruction to
11092 perform the actual function return. */
11094 && func_type != ARM_FT_INTERWORKED
11095 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
11097 /* The return has already been handled
11098 by loading the LR into the PC. */
11105 switch ((int) ARM_FUNC_TYPE (func_type))
11109 /* ??? This is wrong for unified assembly syntax. */
11110 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
11113 case ARM_FT_INTERWORKED:
11114 sprintf (instr, "bx%s\t%%|lr", conditional);
11117 case ARM_FT_EXCEPTION:
11118 /* ??? This is wrong for unified assembly syntax. */
11119 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
11123 /* Use bx if it's available. */
11124 if (arm_arch5 || arm_arch4t)
11125 sprintf (instr, "bx%s\t%%|lr", conditional);
11127 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
11131 output_asm_insn (instr, & operand);
11137 /* Write the function name into the code section, directly preceding
11138 the function prologue.
11140 Code will be output similar to this:
11142 .ascii "arm_poke_function_name", 0
11145 .word 0xff000000 + (t1 - t0)
11146 arm_poke_function_name
11148 stmfd sp!, {fp, ip, lr, pc}
11151 When performing a stack backtrace, code can inspect the value
11152 of 'pc' stored at 'fp' + 0. If the trace function then looks
11153 at location pc - 12 and the top 8 bits are set, then we know
11154 that there is a function name embedded immediately preceding this
11155 location and has length ((pc[-3]) & 0xff000000).
11157 We assume that pc is declared as a pointer to an unsigned long.
11159 It is of no benefit to output the function name if we are assembling
11160 a leaf function. These function types will not contain a stack
11161 backtrace structure, therefore it is not possible to determine the
11164 arm_poke_function_name (FILE *stream, const char *name)
11166 unsigned long alignlength;
11167 unsigned long length;
11170 length = strlen (name) + 1;
11171 alignlength = ROUND_UP_WORD (length);
11173 ASM_OUTPUT_ASCII (stream, name, length);
11174 ASM_OUTPUT_ALIGN (stream, 2);
11175 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
11176 assemble_aligned_integer (UNITS_PER_WORD, x);
11179 /* Place some comments into the assembler stream
11180 describing the current function. */
11182 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
11184 unsigned long func_type;
11188 thumb1_output_function_prologue (f, frame_size);
11192 /* Sanity check. */
11193 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
11195 func_type = arm_current_func_type ();
11197 switch ((int) ARM_FUNC_TYPE (func_type))
11200 case ARM_FT_NORMAL:
11202 case ARM_FT_INTERWORKED:
11203 asm_fprintf (f, "\t%@ Function supports interworking.\n");
11206 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
11209 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
11211 case ARM_FT_EXCEPTION:
11212 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
11216 if (IS_NAKED (func_type))
11217 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
11219 if (IS_VOLATILE (func_type))
11220 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
11222 if (IS_NESTED (func_type))
11223 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
11224 if (IS_STACKALIGN (func_type))
11225 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
11227 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
11228 current_function_args_size,
11229 current_function_pretend_args_size, frame_size);
11231 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
11232 frame_pointer_needed,
11233 cfun->machine->uses_anonymous_args);
11235 if (cfun->machine->lr_save_eliminated)
11236 asm_fprintf (f, "\t%@ link register save eliminated.\n");
11238 if (current_function_calls_eh_return)
11239 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
11241 return_used_this_function = 0;
11245 arm_output_epilogue (rtx sibling)
11248 unsigned long saved_regs_mask;
11249 unsigned long func_type;
11250 /* Floats_offset is the offset from the "virtual" frame. In an APCS
11251 frame that is $fp + 4 for a non-variadic function. */
11252 int floats_offset = 0;
11254 FILE * f = asm_out_file;
11255 unsigned int lrm_count = 0;
11256 int really_return = (sibling == NULL);
11258 arm_stack_offsets *offsets;
11260 /* If we have already generated the return instruction
11261 then it is futile to generate anything else. */
11262 if (use_return_insn (FALSE, sibling) && return_used_this_function)
11265 func_type = arm_current_func_type ();
11267 if (IS_NAKED (func_type))
11268 /* Naked functions don't have epilogues. */
11271 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11275 /* A volatile function should never return. Call abort. */
11276 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
11277 assemble_external_libcall (op);
11278 output_asm_insn ("bl\t%a0", &op);
11283 /* If we are throwing an exception, then we really must be doing a
11284 return, so we can't tail-call. */
11285 gcc_assert (!current_function_calls_eh_return || really_return);
11287 offsets = arm_get_frame_offsets ();
11288 saved_regs_mask = offsets->saved_regs_mask;
11291 lrm_count = bit_count (saved_regs_mask);
11293 floats_offset = offsets->saved_args;
11294 /* Compute how far away the floats will be. */
11295 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11296 if (saved_regs_mask & (1 << reg))
11297 floats_offset += 4;
11299 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
11301 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
11302 int vfp_offset = offsets->frame;
11304 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11306 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11307 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11309 floats_offset += 12;
11310 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
11311 reg, FP_REGNUM, floats_offset - vfp_offset);
11316 start_reg = LAST_FPA_REGNUM;
11318 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11320 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11322 floats_offset += 12;
11324 /* We can't unstack more than four registers at once. */
11325 if (start_reg - reg == 3)
11327 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
11328 reg, FP_REGNUM, floats_offset - vfp_offset);
11329 start_reg = reg - 1;
11334 if (reg != start_reg)
11335 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11336 reg + 1, start_reg - reg,
11337 FP_REGNUM, floats_offset - vfp_offset);
11338 start_reg = reg - 1;
11342 /* Just in case the last register checked also needs unstacking. */
11343 if (reg != start_reg)
11344 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11345 reg + 1, start_reg - reg,
11346 FP_REGNUM, floats_offset - vfp_offset);
11349 if (TARGET_HARD_FLOAT && TARGET_VFP)
11353 /* The fldmd insns do not have base+offset addressing
11354 modes, so we use IP to hold the address. */
11355 saved_size = arm_get_vfp_saved_size ();
11357 if (saved_size > 0)
11359 floats_offset += saved_size;
11360 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
11361 FP_REGNUM, floats_offset - vfp_offset);
11363 start_reg = FIRST_VFP_REGNUM;
11364 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11366 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11367 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11369 if (start_reg != reg)
11370 vfp_output_fldmd (f, IP_REGNUM,
11371 (start_reg - FIRST_VFP_REGNUM) / 2,
11372 (reg - start_reg) / 2);
11373 start_reg = reg + 2;
11376 if (start_reg != reg)
11377 vfp_output_fldmd (f, IP_REGNUM,
11378 (start_reg - FIRST_VFP_REGNUM) / 2,
11379 (reg - start_reg) / 2);
11384 /* The frame pointer is guaranteed to be non-double-word aligned.
11385 This is because it is set to (old_stack_pointer - 4) and the
11386 old_stack_pointer was double word aligned. Thus the offset to
11387 the iWMMXt registers to be loaded must also be non-double-word
11388 sized, so that the resultant address *is* double-word aligned.
11389 We can ignore floats_offset since that was already included in
11390 the live_regs_mask. */
11391 lrm_count += (lrm_count % 2 ? 2 : 1);
11393 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
11394 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11396 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
11397 reg, FP_REGNUM, lrm_count * 4);
11402 /* saved_regs_mask should contain the IP, which at the time of stack
11403 frame generation actually contains the old stack pointer. So a
11404 quick way to unwind the stack is just pop the IP register directly
11405 into the stack pointer. */
11406 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
11407 saved_regs_mask &= ~ (1 << IP_REGNUM);
11408 saved_regs_mask |= (1 << SP_REGNUM);
11410 /* There are two registers left in saved_regs_mask - LR and PC. We
11411 only need to restore the LR register (the return address), but to
11412 save time we can load it directly into the PC, unless we need a
11413 special function exit sequence, or we are not really returning. */
11415 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
11416 && !current_function_calls_eh_return)
11417 /* Delete the LR from the register mask, so that the LR on
11418 the stack is loaded into the PC in the register mask. */
11419 saved_regs_mask &= ~ (1 << LR_REGNUM);
11421 saved_regs_mask &= ~ (1 << PC_REGNUM);
11423 /* We must use SP as the base register, because SP is one of the
11424 registers being restored. If an interrupt or page fault
11425 happens in the ldm instruction, the SP might or might not
11426 have been restored. That would be bad, as then SP will no
11427 longer indicate the safe area of stack, and we can get stack
11428 corruption. Using SP as the base register means that it will
11429 be reset correctly to the original value, should an interrupt
11430 occur. If the stack pointer already points at the right
11431 place, then omit the subtraction. */
11432 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
11433 || current_function_calls_alloca)
11434 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
11435 4 * bit_count (saved_regs_mask));
11436 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
11438 if (IS_INTERRUPT (func_type))
11439 /* Interrupt handlers will have pushed the
11440 IP onto the stack, so restore it now. */
11441 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
11445 /* This branch is executed for ARM mode (non-apcs frames) and
11446 Thumb-2 mode. Frame layout is essentially the same for those
11447 cases, except that in ARM mode frame pointer points to the
11448 first saved register, while in Thumb-2 mode the frame pointer points
11449 to the last saved register.
11451 It is possible to make frame pointer point to last saved
11452 register in both cases, and remove some conditionals below.
11453 That means that fp setup in prologue would be just "mov fp, sp"
11454 and sp restore in epilogue would be just "mov sp, fp", whereas
11455 now we have to use add/sub in those cases. However, the value
11456 of that would be marginal, as both mov and add/sub are 32-bit
11457 in ARM mode, and it would require extra conditionals
11458 in arm_expand_prologue to distingish ARM-apcs-frame case
11459 (where frame pointer is required to point at first register)
11460 and ARM-non-apcs-frame. Therefore, such change is postponed
11461 until real need arise. */
11462 HOST_WIDE_INT amount;
11464 /* Restore stack pointer if necessary. */
11465 if (TARGET_ARM && frame_pointer_needed)
11467 operands[0] = stack_pointer_rtx;
11468 operands[1] = hard_frame_pointer_rtx;
11470 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
11471 output_add_immediate (operands);
11475 if (frame_pointer_needed)
11477 /* For Thumb-2 restore sp from the frame pointer.
11478 Operand restrictions mean we have to incrememnt FP, then copy
11480 amount = offsets->locals_base - offsets->saved_regs;
11481 operands[0] = hard_frame_pointer_rtx;
11485 unsigned long count;
11486 operands[0] = stack_pointer_rtx;
11487 amount = offsets->outgoing_args - offsets->saved_regs;
11488 /* pop call clobbered registers if it avoids a
11489 separate stack adjustment. */
11490 count = offsets->saved_regs - offsets->saved_args;
11493 && !current_function_calls_eh_return
11494 && bit_count(saved_regs_mask) * 4 == count
11495 && !IS_INTERRUPT (func_type)
11496 && !cfun->tail_call_emit)
11498 unsigned long mask;
11499 mask = (1 << (arm_size_return_regs() / 4)) - 1;
11501 mask &= ~saved_regs_mask;
11503 while (bit_count (mask) * 4 > amount)
11505 while ((mask & (1 << reg)) == 0)
11507 mask &= ~(1 << reg);
11509 if (bit_count (mask) * 4 == amount) {
11511 saved_regs_mask |= mask;
11518 operands[1] = operands[0];
11519 operands[2] = GEN_INT (amount);
11520 output_add_immediate (operands);
11522 if (frame_pointer_needed)
11523 asm_fprintf (f, "\tmov\t%r, %r\n",
11524 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
11527 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11529 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11530 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11531 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
11536 start_reg = FIRST_FPA_REGNUM;
11538 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11540 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11542 if (reg - start_reg == 3)
11544 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
11545 start_reg, SP_REGNUM);
11546 start_reg = reg + 1;
11551 if (reg != start_reg)
11552 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11553 start_reg, reg - start_reg,
11556 start_reg = reg + 1;
11560 /* Just in case the last register checked also needs unstacking. */
11561 if (reg != start_reg)
11562 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11563 start_reg, reg - start_reg, SP_REGNUM);
11566 if (TARGET_HARD_FLOAT && TARGET_VFP)
11568 start_reg = FIRST_VFP_REGNUM;
11569 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11571 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11572 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11574 if (start_reg != reg)
11575 vfp_output_fldmd (f, SP_REGNUM,
11576 (start_reg - FIRST_VFP_REGNUM) / 2,
11577 (reg - start_reg) / 2);
11578 start_reg = reg + 2;
11581 if (start_reg != reg)
11582 vfp_output_fldmd (f, SP_REGNUM,
11583 (start_reg - FIRST_VFP_REGNUM) / 2,
11584 (reg - start_reg) / 2);
11587 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
11588 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11589 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
11591 /* If we can, restore the LR into the PC. */
11592 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
11593 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
11594 && !IS_STACKALIGN (func_type)
11596 && current_function_pretend_args_size == 0
11597 && saved_regs_mask & (1 << LR_REGNUM)
11598 && !current_function_calls_eh_return)
11600 saved_regs_mask &= ~ (1 << LR_REGNUM);
11601 saved_regs_mask |= (1 << PC_REGNUM);
11602 rfe = IS_INTERRUPT (func_type);
11607 /* Load the registers off the stack. If we only have one register
11608 to load use the LDR instruction - it is faster. For Thumb-2
11609 always use pop and the assembler will pick the best instruction.*/
11610 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
11611 && !IS_INTERRUPT(func_type))
11613 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
11615 else if (saved_regs_mask)
11617 if (saved_regs_mask & (1 << SP_REGNUM))
11618 /* Note - write back to the stack register is not enabled
11619 (i.e. "ldmfd sp!..."). We know that the stack pointer is
11620 in the list of registers and if we add writeback the
11621 instruction becomes UNPREDICTABLE. */
11622 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
11624 else if (TARGET_ARM)
11625 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
11628 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
11631 if (current_function_pretend_args_size)
11633 /* Unwind the pre-pushed regs. */
11634 operands[0] = operands[1] = stack_pointer_rtx;
11635 operands[2] = GEN_INT (current_function_pretend_args_size);
11636 output_add_immediate (operands);
11640 /* We may have already restored PC directly from the stack. */
11641 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
11644 /* Stack adjustment for exception handler. */
11645 if (current_function_calls_eh_return)
11646 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
11647 ARM_EH_STACKADJ_REGNUM);
11649 /* Generate the return instruction. */
11650 switch ((int) ARM_FUNC_TYPE (func_type))
11654 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
11657 case ARM_FT_EXCEPTION:
11658 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11661 case ARM_FT_INTERWORKED:
11662 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11666 if (IS_STACKALIGN (func_type))
11668 /* See comment in arm_expand_prologue. */
11669 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
11671 if (arm_arch5 || arm_arch4t)
11672 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11674 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11682 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11683 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
11685 arm_stack_offsets *offsets;
11691 /* Emit any call-via-reg trampolines that are needed for v4t support
11692 of call_reg and call_value_reg type insns. */
11693 for (regno = 0; regno < LR_REGNUM; regno++)
11695 rtx label = cfun->machine->call_via[regno];
11699 switch_to_section (function_section (current_function_decl));
11700 targetm.asm_out.internal_label (asm_out_file, "L",
11701 CODE_LABEL_NUMBER (label));
11702 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
11706 /* ??? Probably not safe to set this here, since it assumes that a
11707 function will be emitted as assembly immediately after we generate
11708 RTL for it. This does not happen for inline functions. */
11709 return_used_this_function = 0;
11711 else /* TARGET_32BIT */
11713 /* We need to take into account any stack-frame rounding. */
11714 offsets = arm_get_frame_offsets ();
11716 gcc_assert (!use_return_insn (FALSE, NULL)
11717 || !return_used_this_function
11718 || offsets->saved_regs == offsets->outgoing_args
11719 || frame_pointer_needed);
11721 /* Reset the ARM-specific per-function variables. */
11722 after_arm_reorg = 0;
11726 /* Generate and emit an insn that we will recognize as a push_multi.
11727 Unfortunately, since this insn does not reflect very well the actual
11728 semantics of the operation, we need to annotate the insn for the benefit
11729 of DWARF2 frame unwind information. */
11731 emit_multi_reg_push (unsigned long mask)
11734 int num_dwarf_regs;
11738 int dwarf_par_index;
11741 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11742 if (mask & (1 << i))
11745 gcc_assert (num_regs && num_regs <= 16);
11747 /* We don't record the PC in the dwarf frame information. */
11748 num_dwarf_regs = num_regs;
11749 if (mask & (1 << PC_REGNUM))
11752 /* For the body of the insn we are going to generate an UNSPEC in
11753 parallel with several USEs. This allows the insn to be recognized
11754 by the push_multi pattern in the arm.md file. The insn looks
11755 something like this:
11758 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
11759 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
11760 (use (reg:SI 11 fp))
11761 (use (reg:SI 12 ip))
11762 (use (reg:SI 14 lr))
11763 (use (reg:SI 15 pc))
11766 For the frame note however, we try to be more explicit and actually
11767 show each register being stored into the stack frame, plus a (single)
11768 decrement of the stack pointer. We do it this way in order to be
11769 friendly to the stack unwinding code, which only wants to see a single
11770 stack decrement per instruction. The RTL we generate for the note looks
11771 something like this:
11774 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
11775 (set (mem:SI (reg:SI sp)) (reg:SI r4))
11776 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
11777 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
11778 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
11781 This sequence is used both by the code to support stack unwinding for
11782 exceptions handlers and the code to generate dwarf2 frame debugging. */
11784 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
11785 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
11786 dwarf_par_index = 1;
11788 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11790 if (mask & (1 << i))
11792 reg = gen_rtx_REG (SImode, i);
11794 XVECEXP (par, 0, 0)
11795 = gen_rtx_SET (VOIDmode,
11796 gen_frame_mem (BLKmode,
11797 gen_rtx_PRE_DEC (BLKmode,
11798 stack_pointer_rtx)),
11799 gen_rtx_UNSPEC (BLKmode,
11800 gen_rtvec (1, reg),
11801 UNSPEC_PUSH_MULT));
11803 if (i != PC_REGNUM)
11805 tmp = gen_rtx_SET (VOIDmode,
11806 gen_frame_mem (SImode, stack_pointer_rtx),
11808 RTX_FRAME_RELATED_P (tmp) = 1;
11809 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
11817 for (j = 1, i++; j < num_regs; i++)
11819 if (mask & (1 << i))
11821 reg = gen_rtx_REG (SImode, i);
11823 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
11825 if (i != PC_REGNUM)
11828 = gen_rtx_SET (VOIDmode,
11829 gen_frame_mem (SImode,
11830 plus_constant (stack_pointer_rtx,
11833 RTX_FRAME_RELATED_P (tmp) = 1;
11834 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
11841 par = emit_insn (par);
11843 tmp = gen_rtx_SET (VOIDmode,
11845 plus_constant (stack_pointer_rtx, -4 * num_regs));
11846 RTX_FRAME_RELATED_P (tmp) = 1;
11847 XVECEXP (dwarf, 0, 0) = tmp;
11849 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11854 /* Calculate the size of the return value that is passed in registers. */
11856 arm_size_return_regs (void)
11858 enum machine_mode mode;
11860 if (current_function_return_rtx != 0)
11861 mode = GET_MODE (current_function_return_rtx);
11863 mode = DECL_MODE (DECL_RESULT (current_function_decl));
11865 return GET_MODE_SIZE (mode);
11869 emit_sfm (int base_reg, int count)
11876 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11877 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11879 reg = gen_rtx_REG (XFmode, base_reg++);
11881 XVECEXP (par, 0, 0)
11882 = gen_rtx_SET (VOIDmode,
11883 gen_frame_mem (BLKmode,
11884 gen_rtx_PRE_DEC (BLKmode,
11885 stack_pointer_rtx)),
11886 gen_rtx_UNSPEC (BLKmode,
11887 gen_rtvec (1, reg),
11888 UNSPEC_PUSH_MULT));
11889 tmp = gen_rtx_SET (VOIDmode,
11890 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
11891 RTX_FRAME_RELATED_P (tmp) = 1;
11892 XVECEXP (dwarf, 0, 1) = tmp;
11894 for (i = 1; i < count; i++)
11896 reg = gen_rtx_REG (XFmode, base_reg++);
11897 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11899 tmp = gen_rtx_SET (VOIDmode,
11900 gen_frame_mem (XFmode,
11901 plus_constant (stack_pointer_rtx,
11904 RTX_FRAME_RELATED_P (tmp) = 1;
11905 XVECEXP (dwarf, 0, i + 1) = tmp;
11908 tmp = gen_rtx_SET (VOIDmode,
11910 plus_constant (stack_pointer_rtx, -12 * count));
11912 RTX_FRAME_RELATED_P (tmp) = 1;
11913 XVECEXP (dwarf, 0, 0) = tmp;
11915 par = emit_insn (par);
11916 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11922 /* Return true if the current function needs to save/restore LR. */
11925 thumb_force_lr_save (void)
11927 return !cfun->machine->lr_save_eliminated
11928 && (!leaf_function_p ()
11929 || thumb_far_jump_used_p ()
11930 || df_regs_ever_live_p (LR_REGNUM));
11934 /* Compute the distance from register FROM to register TO.
11935 These can be the arg pointer (26), the soft frame pointer (25),
11936 the stack pointer (13) or the hard frame pointer (11).
11937 In thumb mode r7 is used as the soft frame pointer, if needed.
11938 Typical stack layout looks like this:
11940 old stack pointer -> | |
11943 | | saved arguments for
11944 | | vararg functions
11947 hard FP & arg pointer -> | | \
11955 soft frame pointer -> | | /
11960 locals base pointer -> | | /
11965 current stack pointer -> | | /
11968 For a given function some or all of these stack components
11969 may not be needed, giving rise to the possibility of
11970 eliminating some of the registers.
11972 The values returned by this function must reflect the behavior
11973 of arm_expand_prologue() and arm_compute_save_reg_mask().
11975 The sign of the number returned reflects the direction of stack
11976 growth, so the values are positive for all eliminations except
11977 from the soft frame pointer to the hard frame pointer.
11979 SFP may point just inside the local variables block to ensure correct
11983 /* Calculate stack offsets. These are used to calculate register elimination
11984 offsets and in prologue/epilogue code. Also calculates which registers
11985 should be saved. */
11987 static arm_stack_offsets *
11988 arm_get_frame_offsets (void)
11990 struct arm_stack_offsets *offsets;
11991 unsigned long func_type;
11995 HOST_WIDE_INT frame_size;
11998 offsets = &cfun->machine->stack_offsets;
12000 /* We need to know if we are a leaf function. Unfortunately, it
12001 is possible to be called after start_sequence has been called,
12002 which causes get_insns to return the insns for the sequence,
12003 not the function, which will cause leaf_function_p to return
12004 the incorrect result.
12006 to know about leaf functions once reload has completed, and the
12007 frame size cannot be changed after that time, so we can safely
12008 use the cached value. */
12010 if (reload_completed)
12013 /* Initially this is the size of the local variables. It will translated
12014 into an offset once we have determined the size of preceding data. */
12015 frame_size = ROUND_UP_WORD (get_frame_size ());
12017 leaf = leaf_function_p ();
12019 /* Space for variadic functions. */
12020 offsets->saved_args = current_function_pretend_args_size;
12022 /* In Thumb mode this is incorrect, but never used. */
12023 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0);
12027 unsigned int regno;
12029 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
12030 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12031 saved = core_saved;
12033 /* We know that SP will be doubleword aligned on entry, and we must
12034 preserve that condition at any subroutine call. We also require the
12035 soft frame pointer to be doubleword aligned. */
12037 if (TARGET_REALLY_IWMMXT)
12039 /* Check for the call-saved iWMMXt registers. */
12040 for (regno = FIRST_IWMMXT_REGNUM;
12041 regno <= LAST_IWMMXT_REGNUM;
12043 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12047 func_type = arm_current_func_type ();
12048 if (! IS_VOLATILE (func_type))
12050 /* Space for saved FPA registers. */
12051 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
12052 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12055 /* Space for saved VFP registers. */
12056 if (TARGET_HARD_FLOAT && TARGET_VFP)
12057 saved += arm_get_vfp_saved_size ();
12060 else /* TARGET_THUMB1 */
12062 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
12063 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12064 saved = core_saved;
12065 if (TARGET_BACKTRACE)
12069 /* Saved registers include the stack frame. */
12070 offsets->saved_regs = offsets->saved_args + saved;
12071 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
12072 /* A leaf function does not need any stack alignment if it has nothing
12074 if (leaf && frame_size == 0)
12076 offsets->outgoing_args = offsets->soft_frame;
12077 offsets->locals_base = offsets->soft_frame;
12081 /* Ensure SFP has the correct alignment. */
12082 if (ARM_DOUBLEWORD_ALIGN
12083 && (offsets->soft_frame & 7))
12085 offsets->soft_frame += 4;
12086 /* Try to align stack by pushing an extra reg. Don't bother doing this
12087 when there is a stack frame as the alignment will be rolled into
12088 the normal stack adjustment. */
12089 if (frame_size + current_function_outgoing_args_size == 0)
12093 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
12095 if ((offsets->saved_regs_mask & (1 << i)) == 0)
12102 if (reg == -1 && arm_size_return_regs () <= 12
12103 && !cfun->tail_call_emit)
12105 /* Push/pop an argument register (r3) if all callee saved
12106 registers are already being pushed. */
12112 offsets->saved_regs += 4;
12113 offsets->saved_regs_mask |= (1 << reg);
12118 offsets->locals_base = offsets->soft_frame + frame_size;
12119 offsets->outgoing_args = (offsets->locals_base
12120 + current_function_outgoing_args_size);
12122 if (ARM_DOUBLEWORD_ALIGN)
12124 /* Ensure SP remains doubleword aligned. */
12125 if (offsets->outgoing_args & 7)
12126 offsets->outgoing_args += 4;
12127 gcc_assert (!(offsets->outgoing_args & 7));
12134 /* Calculate the relative offsets for the different stack pointers. Positive
12135 offsets are in the direction of stack growth. */
12138 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
12140 arm_stack_offsets *offsets;
12142 offsets = arm_get_frame_offsets ();
12144 /* OK, now we have enough information to compute the distances.
12145 There must be an entry in these switch tables for each pair
12146 of registers in ELIMINABLE_REGS, even if some of the entries
12147 seem to be redundant or useless. */
12150 case ARG_POINTER_REGNUM:
12153 case THUMB_HARD_FRAME_POINTER_REGNUM:
12156 case FRAME_POINTER_REGNUM:
12157 /* This is the reverse of the soft frame pointer
12158 to hard frame pointer elimination below. */
12159 return offsets->soft_frame - offsets->saved_args;
12161 case ARM_HARD_FRAME_POINTER_REGNUM:
12162 /* If there is no stack frame then the hard
12163 frame pointer and the arg pointer coincide. */
12164 if (offsets->frame == offsets->saved_regs)
12166 /* FIXME: Not sure about this. Maybe we should always return 0 ? */
12167 return (frame_pointer_needed
12168 && cfun->static_chain_decl != NULL
12169 && ! cfun->machine->uses_anonymous_args) ? 4 : 0;
12171 case STACK_POINTER_REGNUM:
12172 /* If nothing has been pushed on the stack at all
12173 then this will return -4. This *is* correct! */
12174 return offsets->outgoing_args - (offsets->saved_args + 4);
12177 gcc_unreachable ();
12179 gcc_unreachable ();
12181 case FRAME_POINTER_REGNUM:
12184 case THUMB_HARD_FRAME_POINTER_REGNUM:
12187 case ARM_HARD_FRAME_POINTER_REGNUM:
12188 /* The hard frame pointer points to the top entry in the
12189 stack frame. The soft frame pointer to the bottom entry
12190 in the stack frame. If there is no stack frame at all,
12191 then they are identical. */
12193 return offsets->frame - offsets->soft_frame;
12195 case STACK_POINTER_REGNUM:
12196 return offsets->outgoing_args - offsets->soft_frame;
12199 gcc_unreachable ();
12201 gcc_unreachable ();
12204 /* You cannot eliminate from the stack pointer.
12205 In theory you could eliminate from the hard frame
12206 pointer to the stack pointer, but this will never
12207 happen, since if a stack frame is not needed the
12208 hard frame pointer will never be used. */
12209 gcc_unreachable ();
12214 /* Emit RTL to save coprocessor registers on function entry. Returns the
12215 number of bytes pushed. */
12218 arm_save_coproc_regs(void)
12220 int saved_size = 0;
12222 unsigned start_reg;
12225 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12226 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12228 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
12229 insn = gen_rtx_MEM (V2SImode, insn);
12230 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
12231 RTX_FRAME_RELATED_P (insn) = 1;
12235 /* Save any floating point call-saved registers used by this
12237 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12239 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12240 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12242 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
12243 insn = gen_rtx_MEM (XFmode, insn);
12244 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
12245 RTX_FRAME_RELATED_P (insn) = 1;
12251 start_reg = LAST_FPA_REGNUM;
12253 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12255 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12257 if (start_reg - reg == 3)
12259 insn = emit_sfm (reg, 4);
12260 RTX_FRAME_RELATED_P (insn) = 1;
12262 start_reg = reg - 1;
12267 if (start_reg != reg)
12269 insn = emit_sfm (reg + 1, start_reg - reg);
12270 RTX_FRAME_RELATED_P (insn) = 1;
12271 saved_size += (start_reg - reg) * 12;
12273 start_reg = reg - 1;
12277 if (start_reg != reg)
12279 insn = emit_sfm (reg + 1, start_reg - reg);
12280 saved_size += (start_reg - reg) * 12;
12281 RTX_FRAME_RELATED_P (insn) = 1;
12284 if (TARGET_HARD_FLOAT && TARGET_VFP)
12286 start_reg = FIRST_VFP_REGNUM;
12288 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12290 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12291 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12293 if (start_reg != reg)
12294 saved_size += vfp_emit_fstmd (start_reg,
12295 (reg - start_reg) / 2);
12296 start_reg = reg + 2;
12299 if (start_reg != reg)
12300 saved_size += vfp_emit_fstmd (start_reg,
12301 (reg - start_reg) / 2);
12307 /* Set the Thumb frame pointer from the stack pointer. */
12310 thumb_set_frame_pointer (arm_stack_offsets *offsets)
12312 HOST_WIDE_INT amount;
12315 amount = offsets->outgoing_args - offsets->locals_base;
12317 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12318 stack_pointer_rtx, GEN_INT (amount)));
12321 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
12322 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
12323 expects the first two operands to be the same. */
12326 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12328 hard_frame_pointer_rtx));
12332 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12333 hard_frame_pointer_rtx,
12334 stack_pointer_rtx));
12336 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
12337 plus_constant (stack_pointer_rtx, amount));
12338 RTX_FRAME_RELATED_P (dwarf) = 1;
12339 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
12343 RTX_FRAME_RELATED_P (insn) = 1;
12346 /* Generate the prologue instructions for entry into an ARM or Thumb-2
12349 arm_expand_prologue (void)
12354 unsigned long live_regs_mask;
12355 unsigned long func_type;
12357 int saved_pretend_args = 0;
12358 int saved_regs = 0;
12359 unsigned HOST_WIDE_INT args_to_push;
12360 arm_stack_offsets *offsets;
12362 func_type = arm_current_func_type ();
12364 /* Naked functions don't have prologues. */
12365 if (IS_NAKED (func_type))
12368 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
12369 args_to_push = current_function_pretend_args_size;
12371 /* Compute which register we will have to save onto the stack. */
12372 offsets = arm_get_frame_offsets ();
12373 live_regs_mask = offsets->saved_regs_mask;
12375 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
12377 if (IS_STACKALIGN (func_type))
12382 /* Handle a word-aligned stack pointer. We generate the following:
12387 <save and restore r0 in normal prologue/epilogue>
12391 The unwinder doesn't need to know about the stack realignment.
12392 Just tell it we saved SP in r0. */
12393 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
12395 r0 = gen_rtx_REG (SImode, 0);
12396 r1 = gen_rtx_REG (SImode, 1);
12397 dwarf = gen_rtx_UNSPEC (SImode, NULL_RTVEC, UNSPEC_STACK_ALIGN);
12398 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
12399 insn = gen_movsi (r0, stack_pointer_rtx);
12400 RTX_FRAME_RELATED_P (insn) = 1;
12401 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12402 dwarf, REG_NOTES (insn));
12404 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
12405 emit_insn (gen_movsi (stack_pointer_rtx, r1));
12408 /* For APCS frames, if IP register is clobbered
12409 when creating frame, save that register in a special
12411 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
12413 if (IS_INTERRUPT (func_type))
12415 /* Interrupt functions must not corrupt any registers.
12416 Creating a frame pointer however, corrupts the IP
12417 register, so we must push it first. */
12418 insn = emit_multi_reg_push (1 << IP_REGNUM);
12420 /* Do not set RTX_FRAME_RELATED_P on this insn.
12421 The dwarf stack unwinding code only wants to see one
12422 stack decrement per function, and this is not it. If
12423 this instruction is labeled as being part of the frame
12424 creation sequence then dwarf2out_frame_debug_expr will
12425 die when it encounters the assignment of IP to FP
12426 later on, since the use of SP here establishes SP as
12427 the CFA register and not IP.
12429 Anyway this instruction is not really part of the stack
12430 frame creation although it is part of the prologue. */
12432 else if (IS_NESTED (func_type))
12434 /* The Static chain register is the same as the IP register
12435 used as a scratch register during stack frame creation.
12436 To get around this need to find somewhere to store IP
12437 whilst the frame is being created. We try the following
12440 1. The last argument register.
12441 2. A slot on the stack above the frame. (This only
12442 works if the function is not a varargs function).
12443 3. Register r3, after pushing the argument registers
12446 Note - we only need to tell the dwarf2 backend about the SP
12447 adjustment in the second variant; the static chain register
12448 doesn't need to be unwound, as it doesn't contain a value
12449 inherited from the caller. */
12451 if (df_regs_ever_live_p (3) == false)
12452 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12453 else if (args_to_push == 0)
12457 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
12458 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
12461 /* Just tell the dwarf backend that we adjusted SP. */
12462 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12463 plus_constant (stack_pointer_rtx,
12465 RTX_FRAME_RELATED_P (insn) = 1;
12466 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12467 dwarf, REG_NOTES (insn));
12471 /* Store the args on the stack. */
12472 if (cfun->machine->uses_anonymous_args)
12473 insn = emit_multi_reg_push
12474 ((0xf0 >> (args_to_push / 4)) & 0xf);
12477 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12478 GEN_INT (- args_to_push)));
12480 RTX_FRAME_RELATED_P (insn) = 1;
12482 saved_pretend_args = 1;
12483 fp_offset = args_to_push;
12486 /* Now reuse r3 to preserve IP. */
12487 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12491 insn = emit_set_insn (ip_rtx,
12492 plus_constant (stack_pointer_rtx, fp_offset));
12493 RTX_FRAME_RELATED_P (insn) = 1;
12498 /* Push the argument registers, or reserve space for them. */
12499 if (cfun->machine->uses_anonymous_args)
12500 insn = emit_multi_reg_push
12501 ((0xf0 >> (args_to_push / 4)) & 0xf);
12504 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12505 GEN_INT (- args_to_push)));
12506 RTX_FRAME_RELATED_P (insn) = 1;
12509 /* If this is an interrupt service routine, and the link register
12510 is going to be pushed, and we're not generating extra
12511 push of IP (needed when frame is needed and frame layout if apcs),
12512 subtracting four from LR now will mean that the function return
12513 can be done with a single instruction. */
12514 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
12515 && (live_regs_mask & (1 << LR_REGNUM)) != 0
12516 && !(frame_pointer_needed && TARGET_APCS_FRAME)
12519 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
12521 emit_set_insn (lr, plus_constant (lr, -4));
12524 if (live_regs_mask)
12526 saved_regs += bit_count (live_regs_mask) * 4;
12527 if (optimize_size && !frame_pointer_needed
12528 && saved_regs == offsets->saved_regs - offsets->saved_args)
12530 /* If no coprocessor registers are being pushed and we don't have
12531 to worry about a frame pointer then push extra registers to
12532 create the stack frame. This is done is a way that does not
12533 alter the frame layout, so is independent of the epilogue. */
12537 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
12539 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
12540 if (frame && n * 4 >= frame)
12543 live_regs_mask |= (1 << n) - 1;
12544 saved_regs += frame;
12547 insn = emit_multi_reg_push (live_regs_mask);
12548 RTX_FRAME_RELATED_P (insn) = 1;
12551 if (! IS_VOLATILE (func_type))
12552 saved_regs += arm_save_coproc_regs ();
12554 if (frame_pointer_needed && TARGET_ARM)
12556 /* Create the new frame pointer. */
12557 if (TARGET_APCS_FRAME)
12559 insn = GEN_INT (-(4 + args_to_push + fp_offset));
12560 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
12561 RTX_FRAME_RELATED_P (insn) = 1;
12563 if (IS_NESTED (func_type))
12565 /* Recover the static chain register. */
12566 if (!df_regs_ever_live_p (3)
12567 || saved_pretend_args)
12568 insn = gen_rtx_REG (SImode, 3);
12569 else /* if (current_function_pretend_args_size == 0) */
12571 insn = plus_constant (hard_frame_pointer_rtx, 4);
12572 insn = gen_frame_mem (SImode, insn);
12574 emit_set_insn (ip_rtx, insn);
12575 /* Add a USE to stop propagate_one_insn() from barfing. */
12576 emit_insn (gen_prologue_use (ip_rtx));
12581 insn = GEN_INT (saved_regs - 4);
12582 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12583 stack_pointer_rtx, insn));
12584 RTX_FRAME_RELATED_P (insn) = 1;
12588 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
12590 /* This add can produce multiple insns for a large constant, so we
12591 need to get tricky. */
12592 rtx last = get_last_insn ();
12594 amount = GEN_INT (offsets->saved_args + saved_regs
12595 - offsets->outgoing_args);
12597 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12601 last = last ? NEXT_INSN (last) : get_insns ();
12602 RTX_FRAME_RELATED_P (last) = 1;
12604 while (last != insn);
12606 /* If the frame pointer is needed, emit a special barrier that
12607 will prevent the scheduler from moving stores to the frame
12608 before the stack adjustment. */
12609 if (frame_pointer_needed)
12610 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
12611 hard_frame_pointer_rtx));
12615 if (frame_pointer_needed && TARGET_THUMB2)
12616 thumb_set_frame_pointer (offsets);
12618 if (flag_pic && arm_pic_register != INVALID_REGNUM)
12620 unsigned long mask;
12622 mask = live_regs_mask;
12623 mask &= THUMB2_WORK_REGS;
12624 if (!IS_NESTED (func_type))
12625 mask |= (1 << IP_REGNUM);
12626 arm_load_pic_register (mask);
12629 /* If we are profiling, make sure no instructions are scheduled before
12630 the call to mcount. Similarly if the user has requested no
12631 scheduling in the prolog. Similarly if we want non-call exceptions
12632 using the EABI unwinder, to prevent faulting instructions from being
12633 swapped with a stack adjustment. */
12634 if (current_function_profile || !TARGET_SCHED_PROLOG
12635 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
12636 emit_insn (gen_blockage ());
12638 /* If the link register is being kept alive, with the return address in it,
12639 then make sure that it does not get reused by the ce2 pass. */
12640 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
12641 cfun->machine->lr_save_eliminated = 1;
12644 /* Print condition code to STREAM. Helper function for arm_print_operand. */
12646 arm_print_condition (FILE *stream)
12648 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
12650 /* Branch conversion is not implemented for Thumb-2. */
12653 output_operand_lossage ("predicated Thumb instruction");
12656 if (current_insn_predicate != NULL)
12658 output_operand_lossage
12659 ("predicated instruction in conditional sequence");
12663 fputs (arm_condition_codes[arm_current_cc], stream);
12665 else if (current_insn_predicate)
12667 enum arm_cond_code code;
12671 output_operand_lossage ("predicated Thumb instruction");
12675 code = get_arm_condition_code (current_insn_predicate);
12676 fputs (arm_condition_codes[code], stream);
12681 /* If CODE is 'd', then the X is a condition operand and the instruction
12682 should only be executed if the condition is true.
12683 if CODE is 'D', then the X is a condition operand and the instruction
12684 should only be executed if the condition is false: however, if the mode
12685 of the comparison is CCFPEmode, then always execute the instruction -- we
12686 do this because in these circumstances !GE does not necessarily imply LT;
12687 in these cases the instruction pattern will take care to make sure that
12688 an instruction containing %d will follow, thereby undoing the effects of
12689 doing this instruction unconditionally.
12690 If CODE is 'N' then X is a floating point operand that must be negated
12692 If CODE is 'B' then output a bitwise inverted value of X (a const int).
12693 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
12695 arm_print_operand (FILE *stream, rtx x, int code)
12700 fputs (ASM_COMMENT_START, stream);
12704 fputs (user_label_prefix, stream);
12708 fputs (REGISTER_PREFIX, stream);
12712 arm_print_condition (stream);
12716 /* Nothing in unified syntax, otherwise the current condition code. */
12717 if (!TARGET_UNIFIED_ASM)
12718 arm_print_condition (stream);
12722 /* The current condition code in unified syntax, otherwise nothing. */
12723 if (TARGET_UNIFIED_ASM)
12724 arm_print_condition (stream);
12728 /* The current condition code for a condition code setting instruction.
12729 Preceded by 's' in unified syntax, otherwise followed by 's'. */
12730 if (TARGET_UNIFIED_ASM)
12732 fputc('s', stream);
12733 arm_print_condition (stream);
12737 arm_print_condition (stream);
12738 fputc('s', stream);
12743 /* If the instruction is conditionally executed then print
12744 the current condition code, otherwise print 's'. */
12745 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
12746 if (current_insn_predicate)
12747 arm_print_condition (stream);
12749 fputc('s', stream);
12752 /* %# is a "break" sequence. It doesn't output anything, but is used to
12753 separate e.g. operand numbers from following text, if that text consists
12754 of further digits which we don't want to be part of the operand
12762 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12763 r = REAL_VALUE_NEGATE (r);
12764 fprintf (stream, "%s", fp_const_from_val (&r));
12768 /* An integer without a preceding # sign. */
12770 gcc_assert (GET_CODE (x) == CONST_INT);
12771 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12775 if (GET_CODE (x) == CONST_INT)
12778 val = ARM_SIGN_EXTEND (~INTVAL (x));
12779 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
12783 putc ('~', stream);
12784 output_addr_const (stream, x);
12789 /* The low 16 bits of an immediate constant. */
12790 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
12794 fprintf (stream, "%s", arithmetic_instr (x, 1));
12797 /* Truncate Cirrus shift counts. */
12799 if (GET_CODE (x) == CONST_INT)
12801 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
12804 arm_print_operand (stream, x, 0);
12808 fprintf (stream, "%s", arithmetic_instr (x, 0));
12816 if (!shift_operator (x, SImode))
12818 output_operand_lossage ("invalid shift operand");
12822 shift = shift_op (x, &val);
12826 fprintf (stream, ", %s ", shift);
12828 arm_print_operand (stream, XEXP (x, 1), 0);
12830 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
12835 /* An explanation of the 'Q', 'R' and 'H' register operands:
12837 In a pair of registers containing a DI or DF value the 'Q'
12838 operand returns the register number of the register containing
12839 the least significant part of the value. The 'R' operand returns
12840 the register number of the register containing the most
12841 significant part of the value.
12843 The 'H' operand returns the higher of the two register numbers.
12844 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
12845 same as the 'Q' operand, since the most significant part of the
12846 value is held in the lower number register. The reverse is true
12847 on systems where WORDS_BIG_ENDIAN is false.
12849 The purpose of these operands is to distinguish between cases
12850 where the endian-ness of the values is important (for example
12851 when they are added together), and cases where the endian-ness
12852 is irrelevant, but the order of register operations is important.
12853 For example when loading a value from memory into a register
12854 pair, the endian-ness does not matter. Provided that the value
12855 from the lower memory address is put into the lower numbered
12856 register, and the value from the higher address is put into the
12857 higher numbered register, the load will work regardless of whether
12858 the value being loaded is big-wordian or little-wordian. The
12859 order of the two register loads can matter however, if the address
12860 of the memory location is actually held in one of the registers
12861 being overwritten by the load. */
12863 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12865 output_operand_lossage ("invalid operand for code '%c'", code);
12869 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
12873 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12875 output_operand_lossage ("invalid operand for code '%c'", code);
12879 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
12883 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12885 output_operand_lossage ("invalid operand for code '%c'", code);
12889 asm_fprintf (stream, "%r", REGNO (x) + 1);
12893 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12895 output_operand_lossage ("invalid operand for code '%c'", code);
12899 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
12903 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12905 output_operand_lossage ("invalid operand for code '%c'", code);
12909 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
12913 asm_fprintf (stream, "%r",
12914 GET_CODE (XEXP (x, 0)) == REG
12915 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
12919 asm_fprintf (stream, "{%r-%r}",
12921 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
12924 /* Like 'M', but writing doubleword vector registers, for use by Neon
12928 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
12929 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
12931 asm_fprintf (stream, "{d%d}", regno);
12933 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
12938 /* CONST_TRUE_RTX means always -- that's the default. */
12939 if (x == const_true_rtx)
12942 if (!COMPARISON_P (x))
12944 output_operand_lossage ("invalid operand for code '%c'", code);
12948 fputs (arm_condition_codes[get_arm_condition_code (x)],
12953 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
12954 want to do that. */
12955 if (x == const_true_rtx)
12957 output_operand_lossage ("instruction never executed");
12960 if (!COMPARISON_P (x))
12962 output_operand_lossage ("invalid operand for code '%c'", code);
12966 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
12967 (get_arm_condition_code (x))],
12971 /* Cirrus registers can be accessed in a variety of ways:
12972 single floating point (f)
12973 double floating point (d)
12975 64bit integer (dx). */
12976 case 'W': /* Cirrus register in F mode. */
12977 case 'X': /* Cirrus register in D mode. */
12978 case 'Y': /* Cirrus register in FX mode. */
12979 case 'Z': /* Cirrus register in DX mode. */
12980 gcc_assert (GET_CODE (x) == REG
12981 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
12983 fprintf (stream, "mv%s%s",
12985 : code == 'X' ? "d"
12986 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
12990 /* Print cirrus register in the mode specified by the register's mode. */
12993 int mode = GET_MODE (x);
12995 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
12997 output_operand_lossage ("invalid operand for code '%c'", code);
13001 fprintf (stream, "mv%s%s",
13002 mode == DFmode ? "d"
13003 : mode == SImode ? "fx"
13004 : mode == DImode ? "dx"
13005 : "f", reg_names[REGNO (x)] + 2);
13011 if (GET_CODE (x) != REG
13012 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
13013 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
13014 /* Bad value for wCG register number. */
13016 output_operand_lossage ("invalid operand for code '%c'", code);
13021 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
13024 /* Print an iWMMXt control register name. */
13026 if (GET_CODE (x) != CONST_INT
13028 || INTVAL (x) >= 16)
13029 /* Bad value for wC register number. */
13031 output_operand_lossage ("invalid operand for code '%c'", code);
13037 static const char * wc_reg_names [16] =
13039 "wCID", "wCon", "wCSSF", "wCASF",
13040 "wC4", "wC5", "wC6", "wC7",
13041 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
13042 "wC12", "wC13", "wC14", "wC15"
13045 fprintf (stream, wc_reg_names [INTVAL (x)]);
13049 /* Print a VFP/Neon double precision or quad precision register name. */
13053 int mode = GET_MODE (x);
13054 int is_quad = (code == 'q');
13057 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
13059 output_operand_lossage ("invalid operand for code '%c'", code);
13063 if (GET_CODE (x) != REG
13064 || !IS_VFP_REGNUM (REGNO (x)))
13066 output_operand_lossage ("invalid operand for code '%c'", code);
13071 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
13072 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
13074 output_operand_lossage ("invalid operand for code '%c'", code);
13078 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
13079 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
13083 /* These two codes print the low/high doubleword register of a Neon quad
13084 register, respectively. For pair-structure types, can also print
13085 low/high quadword registers. */
13089 int mode = GET_MODE (x);
13092 if ((GET_MODE_SIZE (mode) != 16
13093 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
13095 output_operand_lossage ("invalid operand for code '%c'", code);
13100 if (!NEON_REGNO_OK_FOR_QUAD (regno))
13102 output_operand_lossage ("invalid operand for code '%c'", code);
13106 if (GET_MODE_SIZE (mode) == 16)
13107 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
13108 + (code == 'f' ? 1 : 0));
13110 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
13111 + (code == 'f' ? 1 : 0));
13115 /* Print a VFPv3 floating-point constant, represented as an integer
13119 int index = vfp3_const_double_index (x);
13120 gcc_assert (index != -1);
13121 fprintf (stream, "%d", index);
13125 /* Print bits representing opcode features for Neon.
13127 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
13128 and polynomials as unsigned.
13130 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
13132 Bit 2 is 1 for rounding functions, 0 otherwise. */
13134 /* Identify the type as 's', 'u', 'p' or 'f'. */
13137 HOST_WIDE_INT bits = INTVAL (x);
13138 fputc ("uspf"[bits & 3], stream);
13142 /* Likewise, but signed and unsigned integers are both 'i'. */
13145 HOST_WIDE_INT bits = INTVAL (x);
13146 fputc ("iipf"[bits & 3], stream);
13150 /* As for 'T', but emit 'u' instead of 'p'. */
13153 HOST_WIDE_INT bits = INTVAL (x);
13154 fputc ("usuf"[bits & 3], stream);
13158 /* Bit 2: rounding (vs none). */
13161 HOST_WIDE_INT bits = INTVAL (x);
13162 fputs ((bits & 4) != 0 ? "r" : "", stream);
13169 output_operand_lossage ("missing operand");
13173 switch (GET_CODE (x))
13176 asm_fprintf (stream, "%r", REGNO (x));
13180 output_memory_reference_mode = GET_MODE (x);
13181 output_address (XEXP (x, 0));
13188 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
13189 sizeof (fpstr), 0, 1);
13190 fprintf (stream, "#%s", fpstr);
13193 fprintf (stream, "#%s", fp_immediate_constant (x));
13197 gcc_assert (GET_CODE (x) != NEG);
13198 fputc ('#', stream);
13199 output_addr_const (stream, x);
13205 /* Target hook for assembling integer objects. The ARM version needs to
13206 handle word-sized values specially. */
13208 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
13210 enum machine_mode mode;
13212 if (size == UNITS_PER_WORD && aligned_p)
13214 fputs ("\t.word\t", asm_out_file);
13215 output_addr_const (asm_out_file, x);
13217 /* Mark symbols as position independent. We only do this in the
13218 .text segment, not in the .data segment. */
13219 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
13220 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
13222 /* See legitimize_pic_address for an explanation of the
13223 TARGET_VXWORKS_RTP check. */
13224 if (TARGET_VXWORKS_RTP
13225 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
13226 fputs ("(GOT)", asm_out_file);
13228 fputs ("(GOTOFF)", asm_out_file);
13230 fputc ('\n', asm_out_file);
13234 mode = GET_MODE (x);
13236 if (arm_vector_mode_supported_p (mode))
13239 unsigned int invmask = 0, parts_per_word;
13241 gcc_assert (GET_CODE (x) == CONST_VECTOR);
13243 units = CONST_VECTOR_NUNITS (x);
13244 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
13246 /* For big-endian Neon vectors, we must permute the vector to the form
13247 which, when loaded by a VLDR or VLDM instruction, will give a vector
13248 with the elements in the right order. */
13249 if (TARGET_NEON && WORDS_BIG_ENDIAN)
13251 parts_per_word = UNITS_PER_WORD / size;
13252 /* FIXME: This might be wrong for 64-bit vector elements, but we don't
13253 support those anywhere yet. */
13254 invmask = (parts_per_word == 0) ? 0 : (1 << (parts_per_word - 1)) - 1;
13257 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13258 for (i = 0; i < units; i++)
13260 rtx elt = CONST_VECTOR_ELT (x, i ^ invmask);
13262 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
13265 for (i = 0; i < units; i++)
13267 rtx elt = CONST_VECTOR_ELT (x, i);
13268 REAL_VALUE_TYPE rval;
13270 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
13273 (rval, GET_MODE_INNER (mode),
13274 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
13280 return default_assemble_integer (x, size, aligned_p);
13284 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
13288 if (!TARGET_AAPCS_BASED)
13291 default_named_section_asm_out_constructor
13292 : default_named_section_asm_out_destructor) (symbol, priority);
13296 /* Put these in the .init_array section, using a special relocation. */
13297 if (priority != DEFAULT_INIT_PRIORITY)
13300 sprintf (buf, "%s.%.5u",
13301 is_ctor ? ".init_array" : ".fini_array",
13303 s = get_section (buf, SECTION_WRITE, NULL_TREE);
13310 switch_to_section (s);
13311 assemble_align (POINTER_SIZE);
13312 fputs ("\t.word\t", asm_out_file);
13313 output_addr_const (asm_out_file, symbol);
13314 fputs ("(target1)\n", asm_out_file);
13317 /* Add a function to the list of static constructors. */
13320 arm_elf_asm_constructor (rtx symbol, int priority)
13322 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
13325 /* Add a function to the list of static destructors. */
13328 arm_elf_asm_destructor (rtx symbol, int priority)
13330 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
13333 /* A finite state machine takes care of noticing whether or not instructions
13334 can be conditionally executed, and thus decrease execution time and code
13335 size by deleting branch instructions. The fsm is controlled by
13336 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
13338 /* The state of the fsm controlling condition codes are:
13339 0: normal, do nothing special
13340 1: make ASM_OUTPUT_OPCODE not output this instruction
13341 2: make ASM_OUTPUT_OPCODE not output this instruction
13342 3: make instructions conditional
13343 4: make instructions conditional
13345 State transitions (state->state by whom under condition):
13346 0 -> 1 final_prescan_insn if the `target' is a label
13347 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
13348 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
13349 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
13350 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
13351 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
13352 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
13353 (the target insn is arm_target_insn).
13355 If the jump clobbers the conditions then we use states 2 and 4.
13357 A similar thing can be done with conditional return insns.
13359 XXX In case the `target' is an unconditional branch, this conditionalising
13360 of the instructions always reduces code size, but not always execution
13361 time. But then, I want to reduce the code size to somewhere near what
13362 /bin/cc produces. */
13364 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
13365 instructions. When a COND_EXEC instruction is seen the subsequent
13366 instructions are scanned so that multiple conditional instructions can be
13367 combined into a single IT block. arm_condexec_count and arm_condexec_mask
13368 specify the length and true/false mask for the IT block. These will be
13369 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
13371 /* Returns the index of the ARM condition code string in
13372 `arm_condition_codes'. COMPARISON should be an rtx like
13373 `(eq (...) (...))'. */
13374 static enum arm_cond_code
13375 get_arm_condition_code (rtx comparison)
13377 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
13379 enum rtx_code comp_code = GET_CODE (comparison);
13381 if (GET_MODE_CLASS (mode) != MODE_CC)
13382 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
13383 XEXP (comparison, 1));
13387 case CC_DNEmode: code = ARM_NE; goto dominance;
13388 case CC_DEQmode: code = ARM_EQ; goto dominance;
13389 case CC_DGEmode: code = ARM_GE; goto dominance;
13390 case CC_DGTmode: code = ARM_GT; goto dominance;
13391 case CC_DLEmode: code = ARM_LE; goto dominance;
13392 case CC_DLTmode: code = ARM_LT; goto dominance;
13393 case CC_DGEUmode: code = ARM_CS; goto dominance;
13394 case CC_DGTUmode: code = ARM_HI; goto dominance;
13395 case CC_DLEUmode: code = ARM_LS; goto dominance;
13396 case CC_DLTUmode: code = ARM_CC;
13399 gcc_assert (comp_code == EQ || comp_code == NE);
13401 if (comp_code == EQ)
13402 return ARM_INVERSE_CONDITION_CODE (code);
13408 case NE: return ARM_NE;
13409 case EQ: return ARM_EQ;
13410 case GE: return ARM_PL;
13411 case LT: return ARM_MI;
13412 default: gcc_unreachable ();
13418 case NE: return ARM_NE;
13419 case EQ: return ARM_EQ;
13420 default: gcc_unreachable ();
13426 case NE: return ARM_MI;
13427 case EQ: return ARM_PL;
13428 default: gcc_unreachable ();
13433 /* These encodings assume that AC=1 in the FPA system control
13434 byte. This allows us to handle all cases except UNEQ and
13438 case GE: return ARM_GE;
13439 case GT: return ARM_GT;
13440 case LE: return ARM_LS;
13441 case LT: return ARM_MI;
13442 case NE: return ARM_NE;
13443 case EQ: return ARM_EQ;
13444 case ORDERED: return ARM_VC;
13445 case UNORDERED: return ARM_VS;
13446 case UNLT: return ARM_LT;
13447 case UNLE: return ARM_LE;
13448 case UNGT: return ARM_HI;
13449 case UNGE: return ARM_PL;
13450 /* UNEQ and LTGT do not have a representation. */
13451 case UNEQ: /* Fall through. */
13452 case LTGT: /* Fall through. */
13453 default: gcc_unreachable ();
13459 case NE: return ARM_NE;
13460 case EQ: return ARM_EQ;
13461 case GE: return ARM_LE;
13462 case GT: return ARM_LT;
13463 case LE: return ARM_GE;
13464 case LT: return ARM_GT;
13465 case GEU: return ARM_LS;
13466 case GTU: return ARM_CC;
13467 case LEU: return ARM_CS;
13468 case LTU: return ARM_HI;
13469 default: gcc_unreachable ();
13475 case LTU: return ARM_CS;
13476 case GEU: return ARM_CC;
13477 default: gcc_unreachable ();
13483 case NE: return ARM_NE;
13484 case EQ: return ARM_EQ;
13485 case GE: return ARM_GE;
13486 case GT: return ARM_GT;
13487 case LE: return ARM_LE;
13488 case LT: return ARM_LT;
13489 case GEU: return ARM_CS;
13490 case GTU: return ARM_HI;
13491 case LEU: return ARM_LS;
13492 case LTU: return ARM_CC;
13493 default: gcc_unreachable ();
13496 default: gcc_unreachable ();
13500 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
13503 thumb2_final_prescan_insn (rtx insn)
13505 rtx first_insn = insn;
13506 rtx body = PATTERN (insn);
13508 enum arm_cond_code code;
13512 /* Remove the previous insn from the count of insns to be output. */
13513 if (arm_condexec_count)
13514 arm_condexec_count--;
13516 /* Nothing to do if we are already inside a conditional block. */
13517 if (arm_condexec_count)
13520 if (GET_CODE (body) != COND_EXEC)
13523 /* Conditional jumps are implemented directly. */
13524 if (GET_CODE (insn) == JUMP_INSN)
13527 predicate = COND_EXEC_TEST (body);
13528 arm_current_cc = get_arm_condition_code (predicate);
13530 n = get_attr_ce_count (insn);
13531 arm_condexec_count = 1;
13532 arm_condexec_mask = (1 << n) - 1;
13533 arm_condexec_masklen = n;
13534 /* See if subsequent instructions can be combined into the same block. */
13537 insn = next_nonnote_insn (insn);
13539 /* Jumping into the middle of an IT block is illegal, so a label or
13540 barrier terminates the block. */
13541 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
13544 body = PATTERN (insn);
13545 /* USE and CLOBBER aren't really insns, so just skip them. */
13546 if (GET_CODE (body) == USE
13547 || GET_CODE (body) == CLOBBER)
13550 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
13551 if (GET_CODE (body) != COND_EXEC)
13553 /* Allow up to 4 conditionally executed instructions in a block. */
13554 n = get_attr_ce_count (insn);
13555 if (arm_condexec_masklen + n > 4)
13558 predicate = COND_EXEC_TEST (body);
13559 code = get_arm_condition_code (predicate);
13560 mask = (1 << n) - 1;
13561 if (arm_current_cc == code)
13562 arm_condexec_mask |= (mask << arm_condexec_masklen);
13563 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
13566 arm_condexec_count++;
13567 arm_condexec_masklen += n;
13569 /* A jump must be the last instruction in a conditional block. */
13570 if (GET_CODE(insn) == JUMP_INSN)
13573 /* Restore recog_data (getting the attributes of other insns can
13574 destroy this array, but final.c assumes that it remains intact
13575 across this call). */
13576 extract_constrain_insn_cached (first_insn);
13580 arm_final_prescan_insn (rtx insn)
13582 /* BODY will hold the body of INSN. */
13583 rtx body = PATTERN (insn);
13585 /* This will be 1 if trying to repeat the trick, and things need to be
13586 reversed if it appears to fail. */
13589 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
13590 taken are clobbered, even if the rtl suggests otherwise. It also
13591 means that we have to grub around within the jump expression to find
13592 out what the conditions are when the jump isn't taken. */
13593 int jump_clobbers = 0;
13595 /* If we start with a return insn, we only succeed if we find another one. */
13596 int seeking_return = 0;
13598 /* START_INSN will hold the insn from where we start looking. This is the
13599 first insn after the following code_label if REVERSE is true. */
13600 rtx start_insn = insn;
13602 /* If in state 4, check if the target branch is reached, in order to
13603 change back to state 0. */
13604 if (arm_ccfsm_state == 4)
13606 if (insn == arm_target_insn)
13608 arm_target_insn = NULL;
13609 arm_ccfsm_state = 0;
13614 /* If in state 3, it is possible to repeat the trick, if this insn is an
13615 unconditional branch to a label, and immediately following this branch
13616 is the previous target label which is only used once, and the label this
13617 branch jumps to is not too far off. */
13618 if (arm_ccfsm_state == 3)
13620 if (simplejump_p (insn))
13622 start_insn = next_nonnote_insn (start_insn);
13623 if (GET_CODE (start_insn) == BARRIER)
13625 /* XXX Isn't this always a barrier? */
13626 start_insn = next_nonnote_insn (start_insn);
13628 if (GET_CODE (start_insn) == CODE_LABEL
13629 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13630 && LABEL_NUSES (start_insn) == 1)
13635 else if (GET_CODE (body) == RETURN)
13637 start_insn = next_nonnote_insn (start_insn);
13638 if (GET_CODE (start_insn) == BARRIER)
13639 start_insn = next_nonnote_insn (start_insn);
13640 if (GET_CODE (start_insn) == CODE_LABEL
13641 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13642 && LABEL_NUSES (start_insn) == 1)
13645 seeking_return = 1;
13654 gcc_assert (!arm_ccfsm_state || reverse);
13655 if (GET_CODE (insn) != JUMP_INSN)
13658 /* This jump might be paralleled with a clobber of the condition codes
13659 the jump should always come first */
13660 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
13661 body = XVECEXP (body, 0, 0);
13664 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
13665 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
13668 int fail = FALSE, succeed = FALSE;
13669 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
13670 int then_not_else = TRUE;
13671 rtx this_insn = start_insn, label = 0;
13673 /* If the jump cannot be done with one instruction, we cannot
13674 conditionally execute the instruction in the inverse case. */
13675 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
13681 /* Register the insn jumped to. */
13684 if (!seeking_return)
13685 label = XEXP (SET_SRC (body), 0);
13687 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
13688 label = XEXP (XEXP (SET_SRC (body), 1), 0);
13689 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
13691 label = XEXP (XEXP (SET_SRC (body), 2), 0);
13692 then_not_else = FALSE;
13694 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
13695 seeking_return = 1;
13696 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
13698 seeking_return = 1;
13699 then_not_else = FALSE;
13702 gcc_unreachable ();
13704 /* See how many insns this branch skips, and what kind of insns. If all
13705 insns are okay, and the label or unconditional branch to the same
13706 label is not too far away, succeed. */
13707 for (insns_skipped = 0;
13708 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
13712 this_insn = next_nonnote_insn (this_insn);
13716 switch (GET_CODE (this_insn))
13719 /* Succeed if it is the target label, otherwise fail since
13720 control falls in from somewhere else. */
13721 if (this_insn == label)
13725 arm_ccfsm_state = 2;
13726 this_insn = next_nonnote_insn (this_insn);
13729 arm_ccfsm_state = 1;
13737 /* Succeed if the following insn is the target label.
13739 If return insns are used then the last insn in a function
13740 will be a barrier. */
13741 this_insn = next_nonnote_insn (this_insn);
13742 if (this_insn && this_insn == label)
13746 arm_ccfsm_state = 2;
13747 this_insn = next_nonnote_insn (this_insn);
13750 arm_ccfsm_state = 1;
13758 /* The AAPCS says that conditional calls should not be
13759 used since they make interworking inefficient (the
13760 linker can't transform BL<cond> into BLX). That's
13761 only a problem if the machine has BLX. */
13768 /* Succeed if the following insn is the target label, or
13769 if the following two insns are a barrier and the
13771 this_insn = next_nonnote_insn (this_insn);
13772 if (this_insn && GET_CODE (this_insn) == BARRIER)
13773 this_insn = next_nonnote_insn (this_insn);
13775 if (this_insn && this_insn == label
13776 && insns_skipped < max_insns_skipped)
13780 arm_ccfsm_state = 2;
13781 this_insn = next_nonnote_insn (this_insn);
13784 arm_ccfsm_state = 1;
13792 /* If this is an unconditional branch to the same label, succeed.
13793 If it is to another label, do nothing. If it is conditional,
13795 /* XXX Probably, the tests for SET and the PC are
13798 scanbody = PATTERN (this_insn);
13799 if (GET_CODE (scanbody) == SET
13800 && GET_CODE (SET_DEST (scanbody)) == PC)
13802 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
13803 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
13805 arm_ccfsm_state = 2;
13808 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
13811 /* Fail if a conditional return is undesirable (e.g. on a
13812 StrongARM), but still allow this if optimizing for size. */
13813 else if (GET_CODE (scanbody) == RETURN
13814 && !use_return_insn (TRUE, NULL)
13817 else if (GET_CODE (scanbody) == RETURN
13820 arm_ccfsm_state = 2;
13823 else if (GET_CODE (scanbody) == PARALLEL)
13825 switch (get_attr_conds (this_insn))
13835 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
13840 /* Instructions using or affecting the condition codes make it
13842 scanbody = PATTERN (this_insn);
13843 if (!(GET_CODE (scanbody) == SET
13844 || GET_CODE (scanbody) == PARALLEL)
13845 || get_attr_conds (this_insn) != CONDS_NOCOND)
13848 /* A conditional cirrus instruction must be followed by
13849 a non Cirrus instruction. However, since we
13850 conditionalize instructions in this function and by
13851 the time we get here we can't add instructions
13852 (nops), because shorten_branches() has already been
13853 called, we will disable conditionalizing Cirrus
13854 instructions to be safe. */
13855 if (GET_CODE (scanbody) != USE
13856 && GET_CODE (scanbody) != CLOBBER
13857 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
13867 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
13868 arm_target_label = CODE_LABEL_NUMBER (label);
13871 gcc_assert (seeking_return || arm_ccfsm_state == 2);
13873 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
13875 this_insn = next_nonnote_insn (this_insn);
13876 gcc_assert (!this_insn
13877 || (GET_CODE (this_insn) != BARRIER
13878 && GET_CODE (this_insn) != CODE_LABEL));
13882 /* Oh, dear! we ran off the end.. give up. */
13883 extract_constrain_insn_cached (insn);
13884 arm_ccfsm_state = 0;
13885 arm_target_insn = NULL;
13888 arm_target_insn = this_insn;
13892 gcc_assert (!reverse);
13894 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
13896 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
13897 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13898 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
13899 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13903 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
13906 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
13910 if (reverse || then_not_else)
13911 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13914 /* Restore recog_data (getting the attributes of other insns can
13915 destroy this array, but final.c assumes that it remains intact
13916 across this call. */
13917 extract_constrain_insn_cached (insn);
13921 /* Output IT instructions. */
13923 thumb2_asm_output_opcode (FILE * stream)
13928 if (arm_condexec_mask)
13930 for (n = 0; n < arm_condexec_masklen; n++)
13931 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
13933 asm_fprintf(stream, "i%s\t%s\n\t", buff,
13934 arm_condition_codes[arm_current_cc]);
13935 arm_condexec_mask = 0;
13939 /* Returns true if REGNO is a valid register
13940 for holding a quantity of type MODE. */
13942 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
13944 if (GET_MODE_CLASS (mode) == MODE_CC)
13945 return (regno == CC_REGNUM
13946 || (TARGET_HARD_FLOAT && TARGET_VFP
13947 && regno == VFPCC_REGNUM));
13950 /* For the Thumb we only allow values bigger than SImode in
13951 registers 0 - 6, so that there is always a second low
13952 register available to hold the upper part of the value.
13953 We probably we ought to ensure that the register is the
13954 start of an even numbered register pair. */
13955 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
13957 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
13958 && IS_CIRRUS_REGNUM (regno))
13959 /* We have outlawed SI values in Cirrus registers because they
13960 reside in the lower 32 bits, but SF values reside in the
13961 upper 32 bits. This causes gcc all sorts of grief. We can't
13962 even split the registers into pairs because Cirrus SI values
13963 get sign extended to 64bits-- aldyh. */
13964 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
13966 if (TARGET_HARD_FLOAT && TARGET_VFP
13967 && IS_VFP_REGNUM (regno))
13969 if (mode == SFmode || mode == SImode)
13970 return VFP_REGNO_OK_FOR_SINGLE (regno);
13972 if (mode == DFmode)
13973 return VFP_REGNO_OK_FOR_DOUBLE (regno);
13976 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
13977 || (VALID_NEON_QREG_MODE (mode)
13978 && NEON_REGNO_OK_FOR_QUAD (regno))
13979 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
13980 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
13981 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
13982 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
13983 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
13988 if (TARGET_REALLY_IWMMXT)
13990 if (IS_IWMMXT_GR_REGNUM (regno))
13991 return mode == SImode;
13993 if (IS_IWMMXT_REGNUM (regno))
13994 return VALID_IWMMXT_REG_MODE (mode);
13997 /* We allow any value to be stored in the general registers.
13998 Restrict doubleword quantities to even register pairs so that we can
13999 use ldrd. Do not allow Neon structure opaque modes in general registers;
14000 they would use too many. */
14001 if (regno <= LAST_ARM_REGNUM)
14002 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
14003 && !VALID_NEON_STRUCT_MODE (mode);
14005 if (regno == FRAME_POINTER_REGNUM
14006 || regno == ARG_POINTER_REGNUM)
14007 /* We only allow integers in the fake hard registers. */
14008 return GET_MODE_CLASS (mode) == MODE_INT;
14010 /* The only registers left are the FPA registers
14011 which we only allow to hold FP values. */
14012 return (TARGET_HARD_FLOAT && TARGET_FPA
14013 && GET_MODE_CLASS (mode) == MODE_FLOAT
14014 && regno >= FIRST_FPA_REGNUM
14015 && regno <= LAST_FPA_REGNUM);
14018 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
14019 not used in arm mode. */
14021 arm_regno_class (int regno)
14025 if (regno == STACK_POINTER_REGNUM)
14027 if (regno == CC_REGNUM)
14034 if (TARGET_THUMB2 && regno < 8)
14037 if ( regno <= LAST_ARM_REGNUM
14038 || regno == FRAME_POINTER_REGNUM
14039 || regno == ARG_POINTER_REGNUM)
14040 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
14042 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
14043 return TARGET_THUMB2 ? CC_REG : NO_REGS;
14045 if (IS_CIRRUS_REGNUM (regno))
14046 return CIRRUS_REGS;
14048 if (IS_VFP_REGNUM (regno))
14050 if (regno <= D7_VFP_REGNUM)
14051 return VFP_D0_D7_REGS;
14052 else if (regno <= LAST_LO_VFP_REGNUM)
14053 return VFP_LO_REGS;
14055 return VFP_HI_REGS;
14058 if (IS_IWMMXT_REGNUM (regno))
14059 return IWMMXT_REGS;
14061 if (IS_IWMMXT_GR_REGNUM (regno))
14062 return IWMMXT_GR_REGS;
14067 /* Handle a special case when computing the offset
14068 of an argument from the frame pointer. */
14070 arm_debugger_arg_offset (int value, rtx addr)
14074 /* We are only interested if dbxout_parms() failed to compute the offset. */
14078 /* We can only cope with the case where the address is held in a register. */
14079 if (GET_CODE (addr) != REG)
14082 /* If we are using the frame pointer to point at the argument, then
14083 an offset of 0 is correct. */
14084 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
14087 /* If we are using the stack pointer to point at the
14088 argument, then an offset of 0 is correct. */
14089 /* ??? Check this is consistent with thumb2 frame layout. */
14090 if ((TARGET_THUMB || !frame_pointer_needed)
14091 && REGNO (addr) == SP_REGNUM)
14094 /* Oh dear. The argument is pointed to by a register rather
14095 than being held in a register, or being stored at a known
14096 offset from the frame pointer. Since GDB only understands
14097 those two kinds of argument we must translate the address
14098 held in the register into an offset from the frame pointer.
14099 We do this by searching through the insns for the function
14100 looking to see where this register gets its value. If the
14101 register is initialized from the frame pointer plus an offset
14102 then we are in luck and we can continue, otherwise we give up.
14104 This code is exercised by producing debugging information
14105 for a function with arguments like this:
14107 double func (double a, double b, int c, double d) {return d;}
14109 Without this code the stab for parameter 'd' will be set to
14110 an offset of 0 from the frame pointer, rather than 8. */
14112 /* The if() statement says:
14114 If the insn is a normal instruction
14115 and if the insn is setting the value in a register
14116 and if the register being set is the register holding the address of the argument
14117 and if the address is computing by an addition
14118 that involves adding to a register
14119 which is the frame pointer
14124 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14126 if ( GET_CODE (insn) == INSN
14127 && GET_CODE (PATTERN (insn)) == SET
14128 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
14129 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
14130 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
14131 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
14132 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
14135 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
14144 warning (0, "unable to compute real location of stacked parameter");
14145 value = 8; /* XXX magic hack */
14151 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
14154 if ((MASK) & insn_flags) \
14155 add_builtin_function ((NAME), (TYPE), (CODE), \
14156 BUILT_IN_MD, NULL, NULL_TREE); \
14160 struct builtin_description
14162 const unsigned int mask;
14163 const enum insn_code icode;
14164 const char * const name;
14165 const enum arm_builtins code;
14166 const enum rtx_code comparison;
14167 const unsigned int flag;
14170 static const struct builtin_description bdesc_2arg[] =
14172 #define IWMMXT_BUILTIN(code, string, builtin) \
14173 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
14174 ARM_BUILTIN_##builtin, 0, 0 },
14176 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
14177 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
14178 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
14179 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
14180 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
14181 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
14182 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
14183 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
14184 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
14185 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
14186 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
14187 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
14188 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
14189 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
14190 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
14191 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
14192 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
14193 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
14194 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
14195 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
14196 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
14197 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
14198 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
14199 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
14200 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
14201 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
14202 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
14203 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
14204 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
14205 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
14206 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
14207 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
14208 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
14209 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
14210 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
14211 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
14212 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
14213 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
14214 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
14215 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
14216 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
14217 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
14218 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
14219 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
14220 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
14221 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
14222 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
14223 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
14224 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
14225 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
14226 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
14227 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
14228 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
14229 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
14230 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
14231 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
14232 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
14233 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
14235 #define IWMMXT_BUILTIN2(code, builtin) \
14236 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
14238 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
14239 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
14240 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
14241 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
14242 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
14243 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
14244 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
14245 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
14246 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
14247 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
14248 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
14249 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
14250 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
14251 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
14252 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
14253 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
14254 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
14255 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
14256 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
14257 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
14258 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
14259 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
14260 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
14261 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
14262 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
14263 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
14264 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
14265 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
14266 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
14267 IWMMXT_BUILTIN2 (rordi3, WRORDI)
14268 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
14269 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
14272 static const struct builtin_description bdesc_1arg[] =
14274 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
14275 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
14276 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
14277 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
14278 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
14279 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
14280 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
14281 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
14282 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
14283 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
14284 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
14285 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
14286 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
14287 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
14288 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
14289 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
14290 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
14291 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
14294 /* Set up all the iWMMXt builtins. This is
14295 not called if TARGET_IWMMXT is zero. */
14298 arm_init_iwmmxt_builtins (void)
14300 const struct builtin_description * d;
14302 tree endlink = void_list_node;
14304 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14305 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14306 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14309 = build_function_type (integer_type_node,
14310 tree_cons (NULL_TREE, integer_type_node, endlink));
14311 tree v8qi_ftype_v8qi_v8qi_int
14312 = build_function_type (V8QI_type_node,
14313 tree_cons (NULL_TREE, V8QI_type_node,
14314 tree_cons (NULL_TREE, V8QI_type_node,
14315 tree_cons (NULL_TREE,
14318 tree v4hi_ftype_v4hi_int
14319 = build_function_type (V4HI_type_node,
14320 tree_cons (NULL_TREE, V4HI_type_node,
14321 tree_cons (NULL_TREE, integer_type_node,
14323 tree v2si_ftype_v2si_int
14324 = build_function_type (V2SI_type_node,
14325 tree_cons (NULL_TREE, V2SI_type_node,
14326 tree_cons (NULL_TREE, integer_type_node,
14328 tree v2si_ftype_di_di
14329 = build_function_type (V2SI_type_node,
14330 tree_cons (NULL_TREE, long_long_integer_type_node,
14331 tree_cons (NULL_TREE, long_long_integer_type_node,
14333 tree di_ftype_di_int
14334 = build_function_type (long_long_integer_type_node,
14335 tree_cons (NULL_TREE, long_long_integer_type_node,
14336 tree_cons (NULL_TREE, integer_type_node,
14338 tree di_ftype_di_int_int
14339 = build_function_type (long_long_integer_type_node,
14340 tree_cons (NULL_TREE, long_long_integer_type_node,
14341 tree_cons (NULL_TREE, integer_type_node,
14342 tree_cons (NULL_TREE,
14345 tree int_ftype_v8qi
14346 = build_function_type (integer_type_node,
14347 tree_cons (NULL_TREE, V8QI_type_node,
14349 tree int_ftype_v4hi
14350 = build_function_type (integer_type_node,
14351 tree_cons (NULL_TREE, V4HI_type_node,
14353 tree int_ftype_v2si
14354 = build_function_type (integer_type_node,
14355 tree_cons (NULL_TREE, V2SI_type_node,
14357 tree int_ftype_v8qi_int
14358 = build_function_type (integer_type_node,
14359 tree_cons (NULL_TREE, V8QI_type_node,
14360 tree_cons (NULL_TREE, integer_type_node,
14362 tree int_ftype_v4hi_int
14363 = build_function_type (integer_type_node,
14364 tree_cons (NULL_TREE, V4HI_type_node,
14365 tree_cons (NULL_TREE, integer_type_node,
14367 tree int_ftype_v2si_int
14368 = build_function_type (integer_type_node,
14369 tree_cons (NULL_TREE, V2SI_type_node,
14370 tree_cons (NULL_TREE, integer_type_node,
14372 tree v8qi_ftype_v8qi_int_int
14373 = build_function_type (V8QI_type_node,
14374 tree_cons (NULL_TREE, V8QI_type_node,
14375 tree_cons (NULL_TREE, integer_type_node,
14376 tree_cons (NULL_TREE,
14379 tree v4hi_ftype_v4hi_int_int
14380 = build_function_type (V4HI_type_node,
14381 tree_cons (NULL_TREE, V4HI_type_node,
14382 tree_cons (NULL_TREE, integer_type_node,
14383 tree_cons (NULL_TREE,
14386 tree v2si_ftype_v2si_int_int
14387 = build_function_type (V2SI_type_node,
14388 tree_cons (NULL_TREE, V2SI_type_node,
14389 tree_cons (NULL_TREE, integer_type_node,
14390 tree_cons (NULL_TREE,
14393 /* Miscellaneous. */
14394 tree v8qi_ftype_v4hi_v4hi
14395 = build_function_type (V8QI_type_node,
14396 tree_cons (NULL_TREE, V4HI_type_node,
14397 tree_cons (NULL_TREE, V4HI_type_node,
14399 tree v4hi_ftype_v2si_v2si
14400 = build_function_type (V4HI_type_node,
14401 tree_cons (NULL_TREE, V2SI_type_node,
14402 tree_cons (NULL_TREE, V2SI_type_node,
14404 tree v2si_ftype_v4hi_v4hi
14405 = build_function_type (V2SI_type_node,
14406 tree_cons (NULL_TREE, V4HI_type_node,
14407 tree_cons (NULL_TREE, V4HI_type_node,
14409 tree v2si_ftype_v8qi_v8qi
14410 = build_function_type (V2SI_type_node,
14411 tree_cons (NULL_TREE, V8QI_type_node,
14412 tree_cons (NULL_TREE, V8QI_type_node,
14414 tree v4hi_ftype_v4hi_di
14415 = build_function_type (V4HI_type_node,
14416 tree_cons (NULL_TREE, V4HI_type_node,
14417 tree_cons (NULL_TREE,
14418 long_long_integer_type_node,
14420 tree v2si_ftype_v2si_di
14421 = build_function_type (V2SI_type_node,
14422 tree_cons (NULL_TREE, V2SI_type_node,
14423 tree_cons (NULL_TREE,
14424 long_long_integer_type_node,
14426 tree void_ftype_int_int
14427 = build_function_type (void_type_node,
14428 tree_cons (NULL_TREE, integer_type_node,
14429 tree_cons (NULL_TREE, integer_type_node,
14432 = build_function_type (long_long_unsigned_type_node, endlink);
14434 = build_function_type (long_long_integer_type_node,
14435 tree_cons (NULL_TREE, V8QI_type_node,
14438 = build_function_type (long_long_integer_type_node,
14439 tree_cons (NULL_TREE, V4HI_type_node,
14442 = build_function_type (long_long_integer_type_node,
14443 tree_cons (NULL_TREE, V2SI_type_node,
14445 tree v2si_ftype_v4hi
14446 = build_function_type (V2SI_type_node,
14447 tree_cons (NULL_TREE, V4HI_type_node,
14449 tree v4hi_ftype_v8qi
14450 = build_function_type (V4HI_type_node,
14451 tree_cons (NULL_TREE, V8QI_type_node,
14454 tree di_ftype_di_v4hi_v4hi
14455 = build_function_type (long_long_unsigned_type_node,
14456 tree_cons (NULL_TREE,
14457 long_long_unsigned_type_node,
14458 tree_cons (NULL_TREE, V4HI_type_node,
14459 tree_cons (NULL_TREE,
14463 tree di_ftype_v4hi_v4hi
14464 = build_function_type (long_long_unsigned_type_node,
14465 tree_cons (NULL_TREE, V4HI_type_node,
14466 tree_cons (NULL_TREE, V4HI_type_node,
14469 /* Normal vector binops. */
14470 tree v8qi_ftype_v8qi_v8qi
14471 = build_function_type (V8QI_type_node,
14472 tree_cons (NULL_TREE, V8QI_type_node,
14473 tree_cons (NULL_TREE, V8QI_type_node,
14475 tree v4hi_ftype_v4hi_v4hi
14476 = build_function_type (V4HI_type_node,
14477 tree_cons (NULL_TREE, V4HI_type_node,
14478 tree_cons (NULL_TREE, V4HI_type_node,
14480 tree v2si_ftype_v2si_v2si
14481 = build_function_type (V2SI_type_node,
14482 tree_cons (NULL_TREE, V2SI_type_node,
14483 tree_cons (NULL_TREE, V2SI_type_node,
14485 tree di_ftype_di_di
14486 = build_function_type (long_long_unsigned_type_node,
14487 tree_cons (NULL_TREE, long_long_unsigned_type_node,
14488 tree_cons (NULL_TREE,
14489 long_long_unsigned_type_node,
14492 /* Add all builtins that are more or less simple operations on two
14494 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14496 /* Use one of the operands; the target can have a different mode for
14497 mask-generating compares. */
14498 enum machine_mode mode;
14504 mode = insn_data[d->icode].operand[1].mode;
14509 type = v8qi_ftype_v8qi_v8qi;
14512 type = v4hi_ftype_v4hi_v4hi;
14515 type = v2si_ftype_v2si_v2si;
14518 type = di_ftype_di_di;
14522 gcc_unreachable ();
14525 def_mbuiltin (d->mask, d->name, type, d->code);
14528 /* Add the remaining MMX insns with somewhat more complicated types. */
14529 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
14530 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
14531 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
14533 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
14534 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
14535 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
14536 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
14537 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
14538 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
14540 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
14541 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
14542 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
14543 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
14544 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
14545 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
14547 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
14548 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
14549 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
14550 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
14551 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
14552 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
14554 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
14555 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
14556 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
14557 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
14558 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
14559 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
14561 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
14563 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
14564 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
14565 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
14566 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
14568 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
14569 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
14570 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
14571 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
14572 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
14573 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
14574 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
14575 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
14576 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
14578 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
14579 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
14580 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
14582 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
14583 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
14584 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
14586 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
14587 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
14588 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
14589 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
14590 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
14591 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
14593 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
14594 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
14595 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
14596 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
14597 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
14598 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
14599 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
14600 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
14601 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
14602 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
14603 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
14604 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
14606 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
14607 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
14608 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
14609 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
14611 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
14612 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
14613 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
14614 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
14615 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
14616 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
14617 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
14621 arm_init_tls_builtins (void)
14625 ftype = build_function_type (ptr_type_node, void_list_node);
14626 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
14627 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
14629 TREE_NOTHROW (decl) = 1;
14630 TREE_READONLY (decl) = 1;
14647 } neon_builtin_type_bits;
14649 #define v8qi_UP T_V8QI
14650 #define v4hi_UP T_V4HI
14651 #define v2si_UP T_V2SI
14652 #define v2sf_UP T_V2SF
14654 #define v16qi_UP T_V16QI
14655 #define v8hi_UP T_V8HI
14656 #define v4si_UP T_V4SI
14657 #define v4sf_UP T_V4SF
14658 #define v2di_UP T_V2DI
14663 #define UP(X) X##_UP
14698 NEON_LOADSTRUCTLANE,
14700 NEON_STORESTRUCTLANE,
14709 const neon_itype itype;
14710 const neon_builtin_type_bits bits;
14711 const enum insn_code codes[T_MAX];
14712 const unsigned int num_vars;
14713 unsigned int base_fcode;
14714 } neon_builtin_datum;
14716 #define CF(N,X) CODE_FOR_neon_##N##X
14718 #define VAR1(T, N, A) \
14719 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
14720 #define VAR2(T, N, A, B) \
14721 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
14722 #define VAR3(T, N, A, B, C) \
14723 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
14724 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
14725 #define VAR4(T, N, A, B, C, D) \
14726 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
14727 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
14728 #define VAR5(T, N, A, B, C, D, E) \
14729 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
14730 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
14731 #define VAR6(T, N, A, B, C, D, E, F) \
14732 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
14733 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
14734 #define VAR7(T, N, A, B, C, D, E, F, G) \
14735 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
14736 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14738 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
14739 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14741 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14742 CF (N, G), CF (N, H) }, 8, 0
14743 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
14744 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14745 | UP (H) | UP (I), \
14746 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14747 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
14748 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
14749 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14750 | UP (H) | UP (I) | UP (J), \
14751 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14752 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
14754 /* The mode entries in the following table correspond to the "key" type of the
14755 instruction variant, i.e. equivalent to that which would be specified after
14756 the assembler mnemonic, which usually refers to the last vector operand.
14757 (Signed/unsigned/polynomial types are not differentiated between though, and
14758 are all mapped onto the same mode for a given element size.) The modes
14759 listed per instruction should be the same as those defined for that
14760 instruction's pattern in neon.md.
14761 WARNING: Variants should be listed in the same increasing order as
14762 neon_builtin_type_bits. */
14764 static neon_builtin_datum neon_builtin_data[] =
14766 { VAR10 (BINOP, vadd,
14767 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14768 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
14769 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
14770 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14771 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14772 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
14773 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14774 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14775 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
14776 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14777 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
14778 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
14779 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
14780 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
14781 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
14782 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
14783 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
14784 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
14785 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
14786 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
14787 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
14788 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
14789 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14790 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14791 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14792 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
14793 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
14794 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
14795 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14796 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14797 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14798 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
14799 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14800 { VAR10 (BINOP, vsub,
14801 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14802 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
14803 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
14804 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14805 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14806 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
14807 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14808 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14809 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14810 { VAR2 (BINOP, vcage, v2sf, v4sf) },
14811 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
14812 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14813 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14814 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
14815 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14816 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
14817 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14818 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14819 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
14820 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14821 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14822 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
14823 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
14824 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
14825 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
14826 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14827 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14828 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14829 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14830 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14831 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14832 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14833 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14834 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
14835 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
14836 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
14837 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14838 /* FIXME: vget_lane supports more variants than this! */
14839 { VAR10 (GETLANE, vget_lane,
14840 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14841 { VAR10 (SETLANE, vset_lane,
14842 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14843 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
14844 { VAR10 (DUP, vdup_n,
14845 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14846 { VAR10 (DUPLANE, vdup_lane,
14847 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14848 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
14849 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
14850 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
14851 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
14852 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
14853 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
14854 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
14855 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14856 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14857 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
14858 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
14859 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14860 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
14861 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
14862 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14863 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14864 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
14865 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
14866 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14867 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
14868 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
14869 { VAR10 (BINOP, vext,
14870 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14871 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14872 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
14873 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
14874 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
14875 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
14876 { VAR10 (SELECT, vbsl,
14877 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14878 { VAR1 (VTBL, vtbl1, v8qi) },
14879 { VAR1 (VTBL, vtbl2, v8qi) },
14880 { VAR1 (VTBL, vtbl3, v8qi) },
14881 { VAR1 (VTBL, vtbl4, v8qi) },
14882 { VAR1 (VTBX, vtbx1, v8qi) },
14883 { VAR1 (VTBX, vtbx2, v8qi) },
14884 { VAR1 (VTBX, vtbx3, v8qi) },
14885 { VAR1 (VTBX, vtbx4, v8qi) },
14886 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14887 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14888 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14889 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
14890 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
14891 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
14892 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
14893 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
14894 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
14895 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
14896 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
14897 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
14898 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
14899 { VAR10 (LOAD1, vld1,
14900 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14901 { VAR10 (LOAD1LANE, vld1_lane,
14902 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14903 { VAR10 (LOAD1, vld1_dup,
14904 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14905 { VAR10 (STORE1, vst1,
14906 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14907 { VAR10 (STORE1LANE, vst1_lane,
14908 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14909 { VAR9 (LOADSTRUCT,
14910 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14911 { VAR7 (LOADSTRUCTLANE, vld2_lane,
14912 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14913 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
14914 { VAR9 (STORESTRUCT, vst2,
14915 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14916 { VAR7 (STORESTRUCTLANE, vst2_lane,
14917 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14918 { VAR9 (LOADSTRUCT,
14919 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14920 { VAR7 (LOADSTRUCTLANE, vld3_lane,
14921 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14922 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
14923 { VAR9 (STORESTRUCT, vst3,
14924 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14925 { VAR7 (STORESTRUCTLANE, vst3_lane,
14926 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14927 { VAR9 (LOADSTRUCT, vld4,
14928 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14929 { VAR7 (LOADSTRUCTLANE, vld4_lane,
14930 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14931 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
14932 { VAR9 (STORESTRUCT, vst4,
14933 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14934 { VAR7 (STORESTRUCTLANE, vst4_lane,
14935 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14936 { VAR10 (LOGICBINOP, vand,
14937 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14938 { VAR10 (LOGICBINOP, vorr,
14939 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14940 { VAR10 (BINOP, veor,
14941 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14942 { VAR10 (LOGICBINOP, vbic,
14943 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14944 { VAR10 (LOGICBINOP, vorn,
14945 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
14961 arm_init_neon_builtins (void)
14963 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
14965 /* Create distinguished type nodes for NEON vector element types,
14966 and pointers to values of such types, so we can detect them later. */
14967 tree neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
14968 tree neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
14969 tree neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
14970 tree neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
14971 tree neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
14972 tree neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
14973 tree neon_float_type_node = make_node (REAL_TYPE);
14975 tree intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
14976 tree intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
14977 tree intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
14978 tree intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
14979 tree float_pointer_node = build_pointer_type (neon_float_type_node);
14981 /* Next create constant-qualified versions of the above types. */
14982 tree const_intQI_node = build_qualified_type (neon_intQI_type_node,
14984 tree const_intHI_node = build_qualified_type (neon_intHI_type_node,
14986 tree const_intSI_node = build_qualified_type (neon_intSI_type_node,
14988 tree const_intDI_node = build_qualified_type (neon_intDI_type_node,
14990 tree const_float_node = build_qualified_type (neon_float_type_node,
14993 tree const_intQI_pointer_node = build_pointer_type (const_intQI_node);
14994 tree const_intHI_pointer_node = build_pointer_type (const_intHI_node);
14995 tree const_intSI_pointer_node = build_pointer_type (const_intSI_node);
14996 tree const_intDI_pointer_node = build_pointer_type (const_intDI_node);
14997 tree const_float_pointer_node = build_pointer_type (const_float_node);
14999 /* Now create vector types based on our NEON element types. */
15000 /* 64-bit vectors. */
15001 tree V8QI_type_node =
15002 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
15003 tree V4HI_type_node =
15004 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
15005 tree V2SI_type_node =
15006 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
15007 tree V2SF_type_node =
15008 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
15009 /* 128-bit vectors. */
15010 tree V16QI_type_node =
15011 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
15012 tree V8HI_type_node =
15013 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
15014 tree V4SI_type_node =
15015 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
15016 tree V4SF_type_node =
15017 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
15018 tree V2DI_type_node =
15019 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
15021 /* Unsigned integer types for various mode sizes. */
15022 tree intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
15023 tree intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
15024 tree intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
15025 tree intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
15027 /* Opaque integer types for structures of vectors. */
15028 tree intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
15029 tree intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
15030 tree intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
15031 tree intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
15033 /* Pointers to vector types. */
15034 tree V8QI_pointer_node = build_pointer_type (V8QI_type_node);
15035 tree V4HI_pointer_node = build_pointer_type (V4HI_type_node);
15036 tree V2SI_pointer_node = build_pointer_type (V2SI_type_node);
15037 tree V2SF_pointer_node = build_pointer_type (V2SF_type_node);
15038 tree V16QI_pointer_node = build_pointer_type (V16QI_type_node);
15039 tree V8HI_pointer_node = build_pointer_type (V8HI_type_node);
15040 tree V4SI_pointer_node = build_pointer_type (V4SI_type_node);
15041 tree V4SF_pointer_node = build_pointer_type (V4SF_type_node);
15042 tree V2DI_pointer_node = build_pointer_type (V2DI_type_node);
15044 /* Operations which return results as pairs. */
15045 tree void_ftype_pv8qi_v8qi_v8qi =
15046 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
15047 V8QI_type_node, NULL);
15048 tree void_ftype_pv4hi_v4hi_v4hi =
15049 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
15050 V4HI_type_node, NULL);
15051 tree void_ftype_pv2si_v2si_v2si =
15052 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
15053 V2SI_type_node, NULL);
15054 tree void_ftype_pv2sf_v2sf_v2sf =
15055 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
15056 V2SF_type_node, NULL);
15057 tree void_ftype_pdi_di_di =
15058 build_function_type_list (void_type_node, intDI_pointer_node,
15059 neon_intDI_type_node, neon_intDI_type_node, NULL);
15060 tree void_ftype_pv16qi_v16qi_v16qi =
15061 build_function_type_list (void_type_node, V16QI_pointer_node,
15062 V16QI_type_node, V16QI_type_node, NULL);
15063 tree void_ftype_pv8hi_v8hi_v8hi =
15064 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
15065 V8HI_type_node, NULL);
15066 tree void_ftype_pv4si_v4si_v4si =
15067 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
15068 V4SI_type_node, NULL);
15069 tree void_ftype_pv4sf_v4sf_v4sf =
15070 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
15071 V4SF_type_node, NULL);
15072 tree void_ftype_pv2di_v2di_v2di =
15073 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
15074 V2DI_type_node, NULL);
15076 tree reinterp_ftype_dreg[5][5];
15077 tree reinterp_ftype_qreg[5][5];
15078 tree dreg_types[5], qreg_types[5];
15080 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
15081 layout_type (neon_float_type_node);
15083 /* Define typedefs which exactly correspond to the modes we are basing vector
15084 types on. If you change these names you'll need to change
15085 the table used by arm_mangle_type too. */
15086 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
15087 "__builtin_neon_qi");
15088 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
15089 "__builtin_neon_hi");
15090 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
15091 "__builtin_neon_si");
15092 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
15093 "__builtin_neon_sf");
15094 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
15095 "__builtin_neon_di");
15097 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
15098 "__builtin_neon_poly8");
15099 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
15100 "__builtin_neon_poly16");
15101 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
15102 "__builtin_neon_uqi");
15103 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
15104 "__builtin_neon_uhi");
15105 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
15106 "__builtin_neon_usi");
15107 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
15108 "__builtin_neon_udi");
15110 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
15111 "__builtin_neon_ti");
15112 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
15113 "__builtin_neon_ei");
15114 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
15115 "__builtin_neon_oi");
15116 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
15117 "__builtin_neon_ci");
15118 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
15119 "__builtin_neon_xi");
15121 dreg_types[0] = V8QI_type_node;
15122 dreg_types[1] = V4HI_type_node;
15123 dreg_types[2] = V2SI_type_node;
15124 dreg_types[3] = V2SF_type_node;
15125 dreg_types[4] = neon_intDI_type_node;
15127 qreg_types[0] = V16QI_type_node;
15128 qreg_types[1] = V8HI_type_node;
15129 qreg_types[2] = V4SI_type_node;
15130 qreg_types[3] = V4SF_type_node;
15131 qreg_types[4] = V2DI_type_node;
15133 for (i = 0; i < 5; i++)
15136 for (j = 0; j < 5; j++)
15138 reinterp_ftype_dreg[i][j]
15139 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
15140 reinterp_ftype_qreg[i][j]
15141 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
15145 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
15147 neon_builtin_datum *d = &neon_builtin_data[i];
15148 unsigned int j, codeidx = 0;
15150 d->base_fcode = fcode;
15152 for (j = 0; j < T_MAX; j++)
15154 const char* const modenames[] = {
15155 "v8qi", "v4hi", "v2si", "v2sf", "di",
15156 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
15160 enum insn_code icode;
15161 int is_load = 0, is_store = 0;
15163 if ((d->bits & (1 << j)) == 0)
15166 icode = d->codes[codeidx++];
15171 case NEON_LOAD1LANE:
15172 case NEON_LOADSTRUCT:
15173 case NEON_LOADSTRUCTLANE:
15175 /* Fall through. */
15177 case NEON_STORE1LANE:
15178 case NEON_STORESTRUCT:
15179 case NEON_STORESTRUCTLANE:
15182 /* Fall through. */
15185 case NEON_LOGICBINOP:
15186 case NEON_SHIFTINSERT:
15193 case NEON_SHIFTIMM:
15194 case NEON_SHIFTACC:
15200 case NEON_LANEMULL:
15201 case NEON_LANEMULH:
15203 case NEON_SCALARMUL:
15204 case NEON_SCALARMULL:
15205 case NEON_SCALARMULH:
15206 case NEON_SCALARMAC:
15212 tree return_type = void_type_node, args = void_list_node;
15214 /* Build a function type directly from the insn_data for this
15215 builtin. The build_function_type() function takes care of
15216 removing duplicates for us. */
15217 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
15221 if (is_load && k == 1)
15223 /* Neon load patterns always have the memory operand
15224 (a SImode pointer) in the operand 1 position. We
15225 want a const pointer to the element type in that
15227 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15233 eltype = const_intQI_pointer_node;
15238 eltype = const_intHI_pointer_node;
15243 eltype = const_intSI_pointer_node;
15248 eltype = const_float_pointer_node;
15253 eltype = const_intDI_pointer_node;
15256 default: gcc_unreachable ();
15259 else if (is_store && k == 0)
15261 /* Similarly, Neon store patterns use operand 0 as
15262 the memory location to store to (a SImode pointer).
15263 Use a pointer to the element type of the store in
15265 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15271 eltype = intQI_pointer_node;
15276 eltype = intHI_pointer_node;
15281 eltype = intSI_pointer_node;
15286 eltype = float_pointer_node;
15291 eltype = intDI_pointer_node;
15294 default: gcc_unreachable ();
15299 switch (insn_data[icode].operand[k].mode)
15301 case VOIDmode: eltype = void_type_node; break;
15303 case QImode: eltype = neon_intQI_type_node; break;
15304 case HImode: eltype = neon_intHI_type_node; break;
15305 case SImode: eltype = neon_intSI_type_node; break;
15306 case SFmode: eltype = neon_float_type_node; break;
15307 case DImode: eltype = neon_intDI_type_node; break;
15308 case TImode: eltype = intTI_type_node; break;
15309 case EImode: eltype = intEI_type_node; break;
15310 case OImode: eltype = intOI_type_node; break;
15311 case CImode: eltype = intCI_type_node; break;
15312 case XImode: eltype = intXI_type_node; break;
15313 /* 64-bit vectors. */
15314 case V8QImode: eltype = V8QI_type_node; break;
15315 case V4HImode: eltype = V4HI_type_node; break;
15316 case V2SImode: eltype = V2SI_type_node; break;
15317 case V2SFmode: eltype = V2SF_type_node; break;
15318 /* 128-bit vectors. */
15319 case V16QImode: eltype = V16QI_type_node; break;
15320 case V8HImode: eltype = V8HI_type_node; break;
15321 case V4SImode: eltype = V4SI_type_node; break;
15322 case V4SFmode: eltype = V4SF_type_node; break;
15323 case V2DImode: eltype = V2DI_type_node; break;
15324 default: gcc_unreachable ();
15328 if (k == 0 && !is_store)
15329 return_type = eltype;
15331 args = tree_cons (NULL_TREE, eltype, args);
15334 ftype = build_function_type (return_type, args);
15338 case NEON_RESULTPAIR:
15340 switch (insn_data[icode].operand[1].mode)
15342 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
15343 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
15344 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
15345 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
15346 case DImode: ftype = void_ftype_pdi_di_di; break;
15347 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
15348 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
15349 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
15350 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
15351 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
15352 default: gcc_unreachable ();
15357 case NEON_REINTERP:
15359 /* We iterate over 5 doubleword types, then 5 quadword
15362 switch (insn_data[icode].operand[0].mode)
15364 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
15365 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
15366 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
15367 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
15368 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
15369 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
15370 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
15371 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
15372 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
15373 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
15374 default: gcc_unreachable ();
15380 gcc_unreachable ();
15383 gcc_assert (ftype != NULL);
15385 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
15387 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
15394 arm_init_builtins (void)
15396 arm_init_tls_builtins ();
15398 if (TARGET_REALLY_IWMMXT)
15399 arm_init_iwmmxt_builtins ();
15402 arm_init_neon_builtins ();
15405 /* Errors in the source file can cause expand_expr to return const0_rtx
15406 where we expect a vector. To avoid crashing, use one of the vector
15407 clear instructions. */
15410 safe_vector_operand (rtx x, enum machine_mode mode)
15412 if (x != const0_rtx)
15414 x = gen_reg_rtx (mode);
15416 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
15417 : gen_rtx_SUBREG (DImode, x, 0)));
15421 /* Subroutine of arm_expand_builtin to take care of binop insns. */
15424 arm_expand_binop_builtin (enum insn_code icode,
15425 tree exp, rtx target)
15428 tree arg0 = CALL_EXPR_ARG (exp, 0);
15429 tree arg1 = CALL_EXPR_ARG (exp, 1);
15430 rtx op0 = expand_normal (arg0);
15431 rtx op1 = expand_normal (arg1);
15432 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15433 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15434 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15436 if (VECTOR_MODE_P (mode0))
15437 op0 = safe_vector_operand (op0, mode0);
15438 if (VECTOR_MODE_P (mode1))
15439 op1 = safe_vector_operand (op1, mode1);
15442 || GET_MODE (target) != tmode
15443 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15444 target = gen_reg_rtx (tmode);
15446 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
15448 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15449 op0 = copy_to_mode_reg (mode0, op0);
15450 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15451 op1 = copy_to_mode_reg (mode1, op1);
15453 pat = GEN_FCN (icode) (target, op0, op1);
15460 /* Subroutine of arm_expand_builtin to take care of unop insns. */
15463 arm_expand_unop_builtin (enum insn_code icode,
15464 tree exp, rtx target, int do_load)
15467 tree arg0 = CALL_EXPR_ARG (exp, 0);
15468 rtx op0 = expand_normal (arg0);
15469 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15470 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15473 || GET_MODE (target) != tmode
15474 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15475 target = gen_reg_rtx (tmode);
15477 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15480 if (VECTOR_MODE_P (mode0))
15481 op0 = safe_vector_operand (op0, mode0);
15483 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15484 op0 = copy_to_mode_reg (mode0, op0);
15487 pat = GEN_FCN (icode) (target, op0);
15495 neon_builtin_compare (const void *a, const void *b)
15497 const neon_builtin_datum *key = a;
15498 const neon_builtin_datum *memb = b;
15499 unsigned int soughtcode = key->base_fcode;
15501 if (soughtcode >= memb->base_fcode
15502 && soughtcode < memb->base_fcode + memb->num_vars)
15504 else if (soughtcode < memb->base_fcode)
15510 static enum insn_code
15511 locate_neon_builtin_icode (int fcode, neon_itype *itype)
15513 neon_builtin_datum key, *found;
15516 key.base_fcode = fcode;
15517 found = bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
15518 sizeof (neon_builtin_data[0]), neon_builtin_compare);
15519 gcc_assert (found);
15520 idx = fcode - (int) found->base_fcode;
15521 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
15524 *itype = found->itype;
15526 return found->codes[idx];
15530 NEON_ARG_COPY_TO_REG,
15535 #define NEON_MAX_BUILTIN_ARGS 5
15537 /* Expand a Neon builtin. */
15539 arm_expand_neon_args (rtx target, int icode, int have_retval,
15544 tree arg[NEON_MAX_BUILTIN_ARGS];
15545 rtx op[NEON_MAX_BUILTIN_ARGS];
15546 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15547 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
15552 || GET_MODE (target) != tmode
15553 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
15554 target = gen_reg_rtx (tmode);
15556 va_start (ap, exp);
15560 builtin_arg thisarg = va_arg (ap, int);
15562 if (thisarg == NEON_ARG_STOP)
15566 arg[argc] = CALL_EXPR_ARG (exp, argc);
15567 op[argc] = expand_normal (arg[argc]);
15568 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
15572 case NEON_ARG_COPY_TO_REG:
15573 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
15574 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15575 (op[argc], mode[argc]))
15576 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
15579 case NEON_ARG_CONSTANT:
15580 /* FIXME: This error message is somewhat unhelpful. */
15581 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15582 (op[argc], mode[argc]))
15583 error ("argument must be a constant");
15586 case NEON_ARG_STOP:
15587 gcc_unreachable ();
15600 pat = GEN_FCN (icode) (target, op[0]);
15604 pat = GEN_FCN (icode) (target, op[0], op[1]);
15608 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
15612 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
15616 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
15620 gcc_unreachable ();
15626 pat = GEN_FCN (icode) (op[0]);
15630 pat = GEN_FCN (icode) (op[0], op[1]);
15634 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15638 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15642 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
15646 gcc_unreachable ();
15657 /* Expand a Neon builtin. These are "special" because they don't have symbolic
15658 constants defined per-instruction or per instruction-variant. Instead, the
15659 required info is looked up in the table neon_builtin_data. */
15661 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
15664 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
15671 return arm_expand_neon_args (target, icode, 1, exp,
15672 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15676 case NEON_SCALARMUL:
15677 case NEON_SCALARMULL:
15678 case NEON_SCALARMULH:
15679 case NEON_SHIFTINSERT:
15680 case NEON_LOGICBINOP:
15681 return arm_expand_neon_args (target, icode, 1, exp,
15682 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15686 return arm_expand_neon_args (target, icode, 1, exp,
15687 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15688 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15692 case NEON_SHIFTIMM:
15693 return arm_expand_neon_args (target, icode, 1, exp,
15694 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
15698 return arm_expand_neon_args (target, icode, 1, exp,
15699 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15703 case NEON_REINTERP:
15704 return arm_expand_neon_args (target, icode, 1, exp,
15705 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15709 return arm_expand_neon_args (target, icode, 1, exp,
15710 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15712 case NEON_RESULTPAIR:
15713 return arm_expand_neon_args (target, icode, 0, exp,
15714 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15718 case NEON_LANEMULL:
15719 case NEON_LANEMULH:
15720 return arm_expand_neon_args (target, icode, 1, exp,
15721 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15722 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15725 return arm_expand_neon_args (target, icode, 1, exp,
15726 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15727 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15729 case NEON_SHIFTACC:
15730 return arm_expand_neon_args (target, icode, 1, exp,
15731 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15732 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15734 case NEON_SCALARMAC:
15735 return arm_expand_neon_args (target, icode, 1, exp,
15736 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15737 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15741 return arm_expand_neon_args (target, icode, 1, exp,
15742 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15746 case NEON_LOADSTRUCT:
15747 return arm_expand_neon_args (target, icode, 1, exp,
15748 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15750 case NEON_LOAD1LANE:
15751 case NEON_LOADSTRUCTLANE:
15752 return arm_expand_neon_args (target, icode, 1, exp,
15753 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15757 case NEON_STORESTRUCT:
15758 return arm_expand_neon_args (target, icode, 0, exp,
15759 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15761 case NEON_STORE1LANE:
15762 case NEON_STORESTRUCTLANE:
15763 return arm_expand_neon_args (target, icode, 0, exp,
15764 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15768 gcc_unreachable ();
15771 /* Emit code to reinterpret one Neon type as another, without altering bits. */
15773 neon_reinterpret (rtx dest, rtx src)
15775 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
15778 /* Emit code to place a Neon pair result in memory locations (with equal
15781 neon_emit_pair_result_insn (enum machine_mode mode,
15782 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
15785 rtx mem = gen_rtx_MEM (mode, destaddr);
15786 rtx tmp1 = gen_reg_rtx (mode);
15787 rtx tmp2 = gen_reg_rtx (mode);
15789 emit_insn (intfn (tmp1, op1, tmp2, op2));
15791 emit_move_insn (mem, tmp1);
15792 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
15793 emit_move_insn (mem, tmp2);
15796 /* Set up operands for a register copy from src to dest, taking care not to
15797 clobber registers in the process.
15798 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
15799 be called with a large N, so that should be OK. */
15802 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
15804 unsigned int copied = 0, opctr = 0;
15805 unsigned int done = (1 << count) - 1;
15808 while (copied != done)
15810 for (i = 0; i < count; i++)
15814 for (j = 0; good && j < count; j++)
15815 if (i != j && (copied & (1 << j)) == 0
15816 && reg_overlap_mentioned_p (src[j], dest[i]))
15821 operands[opctr++] = dest[i];
15822 operands[opctr++] = src[i];
15828 gcc_assert (opctr == count * 2);
15831 /* Expand an expression EXP that calls a built-in function,
15832 with result going to TARGET if that's convenient
15833 (and in mode MODE if that's convenient).
15834 SUBTARGET may be used as the target for computing one of EXP's operands.
15835 IGNORE is nonzero if the value is to be ignored. */
15838 arm_expand_builtin (tree exp,
15840 rtx subtarget ATTRIBUTE_UNUSED,
15841 enum machine_mode mode ATTRIBUTE_UNUSED,
15842 int ignore ATTRIBUTE_UNUSED)
15844 const struct builtin_description * d;
15845 enum insn_code icode;
15846 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15854 int fcode = DECL_FUNCTION_CODE (fndecl);
15856 enum machine_mode tmode;
15857 enum machine_mode mode0;
15858 enum machine_mode mode1;
15859 enum machine_mode mode2;
15861 if (fcode >= ARM_BUILTIN_NEON_BASE)
15862 return arm_expand_neon_builtin (fcode, exp, target);
15866 case ARM_BUILTIN_TEXTRMSB:
15867 case ARM_BUILTIN_TEXTRMUB:
15868 case ARM_BUILTIN_TEXTRMSH:
15869 case ARM_BUILTIN_TEXTRMUH:
15870 case ARM_BUILTIN_TEXTRMSW:
15871 case ARM_BUILTIN_TEXTRMUW:
15872 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
15873 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
15874 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
15875 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
15876 : CODE_FOR_iwmmxt_textrmw);
15878 arg0 = CALL_EXPR_ARG (exp, 0);
15879 arg1 = CALL_EXPR_ARG (exp, 1);
15880 op0 = expand_normal (arg0);
15881 op1 = expand_normal (arg1);
15882 tmode = insn_data[icode].operand[0].mode;
15883 mode0 = insn_data[icode].operand[1].mode;
15884 mode1 = insn_data[icode].operand[2].mode;
15886 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15887 op0 = copy_to_mode_reg (mode0, op0);
15888 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15890 /* @@@ better error message */
15891 error ("selector must be an immediate");
15892 return gen_reg_rtx (tmode);
15895 || GET_MODE (target) != tmode
15896 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15897 target = gen_reg_rtx (tmode);
15898 pat = GEN_FCN (icode) (target, op0, op1);
15904 case ARM_BUILTIN_TINSRB:
15905 case ARM_BUILTIN_TINSRH:
15906 case ARM_BUILTIN_TINSRW:
15907 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
15908 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
15909 : CODE_FOR_iwmmxt_tinsrw);
15910 arg0 = CALL_EXPR_ARG (exp, 0);
15911 arg1 = CALL_EXPR_ARG (exp, 1);
15912 arg2 = CALL_EXPR_ARG (exp, 2);
15913 op0 = expand_normal (arg0);
15914 op1 = expand_normal (arg1);
15915 op2 = expand_normal (arg2);
15916 tmode = insn_data[icode].operand[0].mode;
15917 mode0 = insn_data[icode].operand[1].mode;
15918 mode1 = insn_data[icode].operand[2].mode;
15919 mode2 = insn_data[icode].operand[3].mode;
15921 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15922 op0 = copy_to_mode_reg (mode0, op0);
15923 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15924 op1 = copy_to_mode_reg (mode1, op1);
15925 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15927 /* @@@ better error message */
15928 error ("selector must be an immediate");
15932 || GET_MODE (target) != tmode
15933 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15934 target = gen_reg_rtx (tmode);
15935 pat = GEN_FCN (icode) (target, op0, op1, op2);
15941 case ARM_BUILTIN_SETWCX:
15942 arg0 = CALL_EXPR_ARG (exp, 0);
15943 arg1 = CALL_EXPR_ARG (exp, 1);
15944 op0 = force_reg (SImode, expand_normal (arg0));
15945 op1 = expand_normal (arg1);
15946 emit_insn (gen_iwmmxt_tmcr (op1, op0));
15949 case ARM_BUILTIN_GETWCX:
15950 arg0 = CALL_EXPR_ARG (exp, 0);
15951 op0 = expand_normal (arg0);
15952 target = gen_reg_rtx (SImode);
15953 emit_insn (gen_iwmmxt_tmrc (target, op0));
15956 case ARM_BUILTIN_WSHUFH:
15957 icode = CODE_FOR_iwmmxt_wshufh;
15958 arg0 = CALL_EXPR_ARG (exp, 0);
15959 arg1 = CALL_EXPR_ARG (exp, 1);
15960 op0 = expand_normal (arg0);
15961 op1 = expand_normal (arg1);
15962 tmode = insn_data[icode].operand[0].mode;
15963 mode1 = insn_data[icode].operand[1].mode;
15964 mode2 = insn_data[icode].operand[2].mode;
15966 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15967 op0 = copy_to_mode_reg (mode1, op0);
15968 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15970 /* @@@ better error message */
15971 error ("mask must be an immediate");
15975 || GET_MODE (target) != tmode
15976 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15977 target = gen_reg_rtx (tmode);
15978 pat = GEN_FCN (icode) (target, op0, op1);
15984 case ARM_BUILTIN_WSADB:
15985 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
15986 case ARM_BUILTIN_WSADH:
15987 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
15988 case ARM_BUILTIN_WSADBZ:
15989 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
15990 case ARM_BUILTIN_WSADHZ:
15991 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
15993 /* Several three-argument builtins. */
15994 case ARM_BUILTIN_WMACS:
15995 case ARM_BUILTIN_WMACU:
15996 case ARM_BUILTIN_WALIGN:
15997 case ARM_BUILTIN_TMIA:
15998 case ARM_BUILTIN_TMIAPH:
15999 case ARM_BUILTIN_TMIATT:
16000 case ARM_BUILTIN_TMIATB:
16001 case ARM_BUILTIN_TMIABT:
16002 case ARM_BUILTIN_TMIABB:
16003 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
16004 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
16005 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
16006 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
16007 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
16008 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
16009 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
16010 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
16011 : CODE_FOR_iwmmxt_walign);
16012 arg0 = CALL_EXPR_ARG (exp, 0);
16013 arg1 = CALL_EXPR_ARG (exp, 1);
16014 arg2 = CALL_EXPR_ARG (exp, 2);
16015 op0 = expand_normal (arg0);
16016 op1 = expand_normal (arg1);
16017 op2 = expand_normal (arg2);
16018 tmode = insn_data[icode].operand[0].mode;
16019 mode0 = insn_data[icode].operand[1].mode;
16020 mode1 = insn_data[icode].operand[2].mode;
16021 mode2 = insn_data[icode].operand[3].mode;
16023 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16024 op0 = copy_to_mode_reg (mode0, op0);
16025 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16026 op1 = copy_to_mode_reg (mode1, op1);
16027 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16028 op2 = copy_to_mode_reg (mode2, op2);
16030 || GET_MODE (target) != tmode
16031 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16032 target = gen_reg_rtx (tmode);
16033 pat = GEN_FCN (icode) (target, op0, op1, op2);
16039 case ARM_BUILTIN_WZERO:
16040 target = gen_reg_rtx (DImode);
16041 emit_insn (gen_iwmmxt_clrdi (target));
16044 case ARM_BUILTIN_THREAD_POINTER:
16045 return arm_load_tp (target);
16051 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16052 if (d->code == (const enum arm_builtins) fcode)
16053 return arm_expand_binop_builtin (d->icode, exp, target);
16055 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16056 if (d->code == (const enum arm_builtins) fcode)
16057 return arm_expand_unop_builtin (d->icode, exp, target, 0);
16059 /* @@@ Should really do something sensible here. */
16063 /* Return the number (counting from 0) of
16064 the least significant set bit in MASK. */
16067 number_of_first_bit_set (unsigned mask)
16072 (mask & (1 << bit)) == 0;
16079 /* Emit code to push or pop registers to or from the stack. F is the
16080 assembly file. MASK is the registers to push or pop. PUSH is
16081 nonzero if we should push, and zero if we should pop. For debugging
16082 output, if pushing, adjust CFA_OFFSET by the amount of space added
16083 to the stack. REAL_REGS should have the same number of bits set as
16084 MASK, and will be used instead (in the same order) to describe which
16085 registers were saved - this is used to mark the save slots when we
16086 push high registers after moving them to low registers. */
16088 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
16089 unsigned long real_regs)
16092 int lo_mask = mask & 0xFF;
16093 int pushed_words = 0;
16097 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
16099 /* Special case. Do not generate a POP PC statement here, do it in
16101 thumb_exit (f, -1);
16105 if (ARM_EABI_UNWIND_TABLES && push)
16107 fprintf (f, "\t.save\t{");
16108 for (regno = 0; regno < 15; regno++)
16110 if (real_regs & (1 << regno))
16112 if (real_regs & ((1 << regno) -1))
16114 asm_fprintf (f, "%r", regno);
16117 fprintf (f, "}\n");
16120 fprintf (f, "\t%s\t{", push ? "push" : "pop");
16122 /* Look at the low registers first. */
16123 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
16127 asm_fprintf (f, "%r", regno);
16129 if ((lo_mask & ~1) != 0)
16136 if (push && (mask & (1 << LR_REGNUM)))
16138 /* Catch pushing the LR. */
16142 asm_fprintf (f, "%r", LR_REGNUM);
16146 else if (!push && (mask & (1 << PC_REGNUM)))
16148 /* Catch popping the PC. */
16149 if (TARGET_INTERWORK || TARGET_BACKTRACE
16150 || current_function_calls_eh_return)
16152 /* The PC is never poped directly, instead
16153 it is popped into r3 and then BX is used. */
16154 fprintf (f, "}\n");
16156 thumb_exit (f, -1);
16165 asm_fprintf (f, "%r", PC_REGNUM);
16169 fprintf (f, "}\n");
16171 if (push && pushed_words && dwarf2out_do_frame ())
16173 char *l = dwarf2out_cfi_label ();
16174 int pushed_mask = real_regs;
16176 *cfa_offset += pushed_words * 4;
16177 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
16180 pushed_mask = real_regs;
16181 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
16183 if (pushed_mask & 1)
16184 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
16189 /* Generate code to return from a thumb function.
16190 If 'reg_containing_return_addr' is -1, then the return address is
16191 actually on the stack, at the stack pointer. */
16193 thumb_exit (FILE *f, int reg_containing_return_addr)
16195 unsigned regs_available_for_popping;
16196 unsigned regs_to_pop;
16198 unsigned available;
16202 int restore_a4 = FALSE;
16204 /* Compute the registers we need to pop. */
16208 if (reg_containing_return_addr == -1)
16210 regs_to_pop |= 1 << LR_REGNUM;
16214 if (TARGET_BACKTRACE)
16216 /* Restore the (ARM) frame pointer and stack pointer. */
16217 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
16221 /* If there is nothing to pop then just emit the BX instruction and
16223 if (pops_needed == 0)
16225 if (current_function_calls_eh_return)
16226 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16228 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16231 /* Otherwise if we are not supporting interworking and we have not created
16232 a backtrace structure and the function was not entered in ARM mode then
16233 just pop the return address straight into the PC. */
16234 else if (!TARGET_INTERWORK
16235 && !TARGET_BACKTRACE
16236 && !is_called_in_ARM_mode (current_function_decl)
16237 && !current_function_calls_eh_return)
16239 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
16243 /* Find out how many of the (return) argument registers we can corrupt. */
16244 regs_available_for_popping = 0;
16246 /* If returning via __builtin_eh_return, the bottom three registers
16247 all contain information needed for the return. */
16248 if (current_function_calls_eh_return)
16252 /* If we can deduce the registers used from the function's
16253 return value. This is more reliable that examining
16254 df_regs_ever_live_p () because that will be set if the register is
16255 ever used in the function, not just if the register is used
16256 to hold a return value. */
16258 if (current_function_return_rtx != 0)
16259 mode = GET_MODE (current_function_return_rtx);
16261 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16263 size = GET_MODE_SIZE (mode);
16267 /* In a void function we can use any argument register.
16268 In a function that returns a structure on the stack
16269 we can use the second and third argument registers. */
16270 if (mode == VOIDmode)
16271 regs_available_for_popping =
16272 (1 << ARG_REGISTER (1))
16273 | (1 << ARG_REGISTER (2))
16274 | (1 << ARG_REGISTER (3));
16276 regs_available_for_popping =
16277 (1 << ARG_REGISTER (2))
16278 | (1 << ARG_REGISTER (3));
16280 else if (size <= 4)
16281 regs_available_for_popping =
16282 (1 << ARG_REGISTER (2))
16283 | (1 << ARG_REGISTER (3));
16284 else if (size <= 8)
16285 regs_available_for_popping =
16286 (1 << ARG_REGISTER (3));
16289 /* Match registers to be popped with registers into which we pop them. */
16290 for (available = regs_available_for_popping,
16291 required = regs_to_pop;
16292 required != 0 && available != 0;
16293 available &= ~(available & - available),
16294 required &= ~(required & - required))
16297 /* If we have any popping registers left over, remove them. */
16299 regs_available_for_popping &= ~available;
16301 /* Otherwise if we need another popping register we can use
16302 the fourth argument register. */
16303 else if (pops_needed)
16305 /* If we have not found any free argument registers and
16306 reg a4 contains the return address, we must move it. */
16307 if (regs_available_for_popping == 0
16308 && reg_containing_return_addr == LAST_ARG_REGNUM)
16310 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16311 reg_containing_return_addr = LR_REGNUM;
16313 else if (size > 12)
16315 /* Register a4 is being used to hold part of the return value,
16316 but we have dire need of a free, low register. */
16319 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
16322 if (reg_containing_return_addr != LAST_ARG_REGNUM)
16324 /* The fourth argument register is available. */
16325 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
16331 /* Pop as many registers as we can. */
16332 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16333 regs_available_for_popping);
16335 /* Process the registers we popped. */
16336 if (reg_containing_return_addr == -1)
16338 /* The return address was popped into the lowest numbered register. */
16339 regs_to_pop &= ~(1 << LR_REGNUM);
16341 reg_containing_return_addr =
16342 number_of_first_bit_set (regs_available_for_popping);
16344 /* Remove this register for the mask of available registers, so that
16345 the return address will not be corrupted by further pops. */
16346 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
16349 /* If we popped other registers then handle them here. */
16350 if (regs_available_for_popping)
16354 /* Work out which register currently contains the frame pointer. */
16355 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
16357 /* Move it into the correct place. */
16358 asm_fprintf (f, "\tmov\t%r, %r\n",
16359 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
16361 /* (Temporarily) remove it from the mask of popped registers. */
16362 regs_available_for_popping &= ~(1 << frame_pointer);
16363 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
16365 if (regs_available_for_popping)
16369 /* We popped the stack pointer as well,
16370 find the register that contains it. */
16371 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
16373 /* Move it into the stack register. */
16374 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
16376 /* At this point we have popped all necessary registers, so
16377 do not worry about restoring regs_available_for_popping
16378 to its correct value:
16380 assert (pops_needed == 0)
16381 assert (regs_available_for_popping == (1 << frame_pointer))
16382 assert (regs_to_pop == (1 << STACK_POINTER)) */
16386 /* Since we have just move the popped value into the frame
16387 pointer, the popping register is available for reuse, and
16388 we know that we still have the stack pointer left to pop. */
16389 regs_available_for_popping |= (1 << frame_pointer);
16393 /* If we still have registers left on the stack, but we no longer have
16394 any registers into which we can pop them, then we must move the return
16395 address into the link register and make available the register that
16397 if (regs_available_for_popping == 0 && pops_needed > 0)
16399 regs_available_for_popping |= 1 << reg_containing_return_addr;
16401 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
16402 reg_containing_return_addr);
16404 reg_containing_return_addr = LR_REGNUM;
16407 /* If we have registers left on the stack then pop some more.
16408 We know that at most we will want to pop FP and SP. */
16409 if (pops_needed > 0)
16414 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16415 regs_available_for_popping);
16417 /* We have popped either FP or SP.
16418 Move whichever one it is into the correct register. */
16419 popped_into = number_of_first_bit_set (regs_available_for_popping);
16420 move_to = number_of_first_bit_set (regs_to_pop);
16422 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
16424 regs_to_pop &= ~(1 << move_to);
16429 /* If we still have not popped everything then we must have only
16430 had one register available to us and we are now popping the SP. */
16431 if (pops_needed > 0)
16435 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16436 regs_available_for_popping);
16438 popped_into = number_of_first_bit_set (regs_available_for_popping);
16440 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
16442 assert (regs_to_pop == (1 << STACK_POINTER))
16443 assert (pops_needed == 1)
16447 /* If necessary restore the a4 register. */
16450 if (reg_containing_return_addr != LR_REGNUM)
16452 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16453 reg_containing_return_addr = LR_REGNUM;
16456 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
16459 if (current_function_calls_eh_return)
16460 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16462 /* Return to caller. */
16463 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16468 thumb1_final_prescan_insn (rtx insn)
16470 if (flag_print_asm_name)
16471 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
16472 INSN_ADDRESSES (INSN_UID (insn)));
16476 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
16478 unsigned HOST_WIDE_INT mask = 0xff;
16481 if (val == 0) /* XXX */
16484 for (i = 0; i < 25; i++)
16485 if ((val & (mask << i)) == val)
16491 /* Returns nonzero if the current function contains,
16492 or might contain a far jump. */
16494 thumb_far_jump_used_p (void)
16498 /* This test is only important for leaf functions. */
16499 /* assert (!leaf_function_p ()); */
16501 /* If we have already decided that far jumps may be used,
16502 do not bother checking again, and always return true even if
16503 it turns out that they are not being used. Once we have made
16504 the decision that far jumps are present (and that hence the link
16505 register will be pushed onto the stack) we cannot go back on it. */
16506 if (cfun->machine->far_jump_used)
16509 /* If this function is not being called from the prologue/epilogue
16510 generation code then it must be being called from the
16511 INITIAL_ELIMINATION_OFFSET macro. */
16512 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
16514 /* In this case we know that we are being asked about the elimination
16515 of the arg pointer register. If that register is not being used,
16516 then there are no arguments on the stack, and we do not have to
16517 worry that a far jump might force the prologue to push the link
16518 register, changing the stack offsets. In this case we can just
16519 return false, since the presence of far jumps in the function will
16520 not affect stack offsets.
16522 If the arg pointer is live (or if it was live, but has now been
16523 eliminated and so set to dead) then we do have to test to see if
16524 the function might contain a far jump. This test can lead to some
16525 false negatives, since before reload is completed, then length of
16526 branch instructions is not known, so gcc defaults to returning their
16527 longest length, which in turn sets the far jump attribute to true.
16529 A false negative will not result in bad code being generated, but it
16530 will result in a needless push and pop of the link register. We
16531 hope that this does not occur too often.
16533 If we need doubleword stack alignment this could affect the other
16534 elimination offsets so we can't risk getting it wrong. */
16535 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
16536 cfun->machine->arg_pointer_live = 1;
16537 else if (!cfun->machine->arg_pointer_live)
16541 /* Check to see if the function contains a branch
16542 insn with the far jump attribute set. */
16543 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16545 if (GET_CODE (insn) == JUMP_INSN
16546 /* Ignore tablejump patterns. */
16547 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16548 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
16549 && get_attr_far_jump (insn) == FAR_JUMP_YES
16552 /* Record the fact that we have decided that
16553 the function does use far jumps. */
16554 cfun->machine->far_jump_used = 1;
16562 /* Return nonzero if FUNC must be entered in ARM mode. */
16564 is_called_in_ARM_mode (tree func)
16566 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
16568 /* Ignore the problem about functions whose address is taken. */
16569 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
16573 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
16579 /* The bits which aren't usefully expanded as rtl. */
16581 thumb_unexpanded_epilogue (void)
16583 arm_stack_offsets *offsets;
16585 unsigned long live_regs_mask = 0;
16586 int high_regs_pushed = 0;
16587 int had_to_push_lr;
16590 if (return_used_this_function)
16593 if (IS_NAKED (arm_current_func_type ()))
16596 offsets = arm_get_frame_offsets ();
16597 live_regs_mask = offsets->saved_regs_mask;
16598 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
16600 /* If we can deduce the registers used from the function's return value.
16601 This is more reliable that examining df_regs_ever_live_p () because that
16602 will be set if the register is ever used in the function, not just if
16603 the register is used to hold a return value. */
16604 size = arm_size_return_regs ();
16606 /* The prolog may have pushed some high registers to use as
16607 work registers. e.g. the testsuite file:
16608 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
16609 compiles to produce:
16610 push {r4, r5, r6, r7, lr}
16614 as part of the prolog. We have to undo that pushing here. */
16616 if (high_regs_pushed)
16618 unsigned long mask = live_regs_mask & 0xff;
16621 /* The available low registers depend on the size of the value we are
16629 /* Oh dear! We have no low registers into which we can pop
16632 ("no low registers available for popping high registers");
16634 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
16635 if (live_regs_mask & (1 << next_hi_reg))
16638 while (high_regs_pushed)
16640 /* Find lo register(s) into which the high register(s) can
16642 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16644 if (mask & (1 << regno))
16645 high_regs_pushed--;
16646 if (high_regs_pushed == 0)
16650 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
16652 /* Pop the values into the low register(s). */
16653 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
16655 /* Move the value(s) into the high registers. */
16656 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16658 if (mask & (1 << regno))
16660 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
16663 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
16664 if (live_regs_mask & (1 << next_hi_reg))
16669 live_regs_mask &= ~0x0f00;
16672 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
16673 live_regs_mask &= 0xff;
16675 if (current_function_pretend_args_size == 0 || TARGET_BACKTRACE)
16677 /* Pop the return address into the PC. */
16678 if (had_to_push_lr)
16679 live_regs_mask |= 1 << PC_REGNUM;
16681 /* Either no argument registers were pushed or a backtrace
16682 structure was created which includes an adjusted stack
16683 pointer, so just pop everything. */
16684 if (live_regs_mask)
16685 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16688 /* We have either just popped the return address into the
16689 PC or it is was kept in LR for the entire function. */
16690 if (!had_to_push_lr)
16691 thumb_exit (asm_out_file, LR_REGNUM);
16695 /* Pop everything but the return address. */
16696 if (live_regs_mask)
16697 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16700 if (had_to_push_lr)
16704 /* We have no free low regs, so save one. */
16705 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
16709 /* Get the return address into a temporary register. */
16710 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
16711 1 << LAST_ARG_REGNUM);
16715 /* Move the return address to lr. */
16716 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
16718 /* Restore the low register. */
16719 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
16724 regno = LAST_ARG_REGNUM;
16729 /* Remove the argument registers that were pushed onto the stack. */
16730 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
16731 SP_REGNUM, SP_REGNUM,
16732 current_function_pretend_args_size);
16734 thumb_exit (asm_out_file, regno);
16740 /* Functions to save and restore machine-specific function data. */
16741 static struct machine_function *
16742 arm_init_machine_status (void)
16744 struct machine_function *machine;
16745 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
16747 #if ARM_FT_UNKNOWN != 0
16748 machine->func_type = ARM_FT_UNKNOWN;
16753 /* Return an RTX indicating where the return address to the
16754 calling function can be found. */
16756 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
16761 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
16764 /* Do anything needed before RTL is emitted for each function. */
16766 arm_init_expanders (void)
16768 /* Arrange to initialize and mark the machine per-function status. */
16769 init_machine_status = arm_init_machine_status;
16771 /* This is to stop the combine pass optimizing away the alignment
16772 adjustment of va_arg. */
16773 /* ??? It is claimed that this should not be necessary. */
16775 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
16779 /* Like arm_compute_initial_elimination offset. Simpler because there
16780 isn't an ABI specified frame pointer for Thumb. Instead, we set it
16781 to point at the base of the local variables after static stack
16782 space for a function has been allocated. */
16785 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16787 arm_stack_offsets *offsets;
16789 offsets = arm_get_frame_offsets ();
16793 case ARG_POINTER_REGNUM:
16796 case STACK_POINTER_REGNUM:
16797 return offsets->outgoing_args - offsets->saved_args;
16799 case FRAME_POINTER_REGNUM:
16800 return offsets->soft_frame - offsets->saved_args;
16802 case ARM_HARD_FRAME_POINTER_REGNUM:
16803 return offsets->saved_regs - offsets->saved_args;
16805 case THUMB_HARD_FRAME_POINTER_REGNUM:
16806 return offsets->locals_base - offsets->saved_args;
16809 gcc_unreachable ();
16813 case FRAME_POINTER_REGNUM:
16816 case STACK_POINTER_REGNUM:
16817 return offsets->outgoing_args - offsets->soft_frame;
16819 case ARM_HARD_FRAME_POINTER_REGNUM:
16820 return offsets->saved_regs - offsets->soft_frame;
16822 case THUMB_HARD_FRAME_POINTER_REGNUM:
16823 return offsets->locals_base - offsets->soft_frame;
16826 gcc_unreachable ();
16831 gcc_unreachable ();
16835 /* Generate the rest of a function's prologue. */
16837 thumb1_expand_prologue (void)
16841 HOST_WIDE_INT amount;
16842 arm_stack_offsets *offsets;
16843 unsigned long func_type;
16845 unsigned long live_regs_mask;
16847 func_type = arm_current_func_type ();
16849 /* Naked functions don't have prologues. */
16850 if (IS_NAKED (func_type))
16853 if (IS_INTERRUPT (func_type))
16855 error ("interrupt Service Routines cannot be coded in Thumb mode");
16859 offsets = arm_get_frame_offsets ();
16860 live_regs_mask = offsets->saved_regs_mask;
16861 /* Load the pic register before setting the frame pointer,
16862 so we can use r7 as a temporary work register. */
16863 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16864 arm_load_pic_register (live_regs_mask);
16866 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
16867 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
16868 stack_pointer_rtx);
16870 amount = offsets->outgoing_args - offsets->saved_regs;
16875 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16876 GEN_INT (- amount)));
16877 RTX_FRAME_RELATED_P (insn) = 1;
16883 /* The stack decrement is too big for an immediate value in a single
16884 insn. In theory we could issue multiple subtracts, but after
16885 three of them it becomes more space efficient to place the full
16886 value in the constant pool and load into a register. (Also the
16887 ARM debugger really likes to see only one stack decrement per
16888 function). So instead we look for a scratch register into which
16889 we can load the decrement, and then we subtract this from the
16890 stack pointer. Unfortunately on the thumb the only available
16891 scratch registers are the argument registers, and we cannot use
16892 these as they may hold arguments to the function. Instead we
16893 attempt to locate a call preserved register which is used by this
16894 function. If we can find one, then we know that it will have
16895 been pushed at the start of the prologue and so we can corrupt
16897 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
16898 if (live_regs_mask & (1 << regno)
16899 && !(frame_pointer_needed
16900 && (regno == THUMB_HARD_FRAME_POINTER_REGNUM)))
16903 if (regno > LAST_LO_REGNUM) /* Very unlikely. */
16905 rtx spare = gen_rtx_REG (SImode, IP_REGNUM);
16907 /* Choose an arbitrary, non-argument low register. */
16908 reg = gen_rtx_REG (SImode, LAST_LO_REGNUM);
16910 /* Save it by copying it into a high, scratch register. */
16911 emit_insn (gen_movsi (spare, reg));
16912 /* Add a USE to stop propagate_one_insn() from barfing. */
16913 emit_insn (gen_prologue_use (spare));
16915 /* Decrement the stack. */
16916 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
16917 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
16918 stack_pointer_rtx, reg));
16919 RTX_FRAME_RELATED_P (insn) = 1;
16920 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16921 plus_constant (stack_pointer_rtx,
16923 RTX_FRAME_RELATED_P (dwarf) = 1;
16925 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
16928 /* Restore the low register's original value. */
16929 emit_insn (gen_movsi (reg, spare));
16931 /* Emit a USE of the restored scratch register, so that flow
16932 analysis will not consider the restore redundant. The
16933 register won't be used again in this function and isn't
16934 restored by the epilogue. */
16935 emit_insn (gen_prologue_use (reg));
16939 reg = gen_rtx_REG (SImode, regno);
16941 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
16943 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
16944 stack_pointer_rtx, reg));
16945 RTX_FRAME_RELATED_P (insn) = 1;
16946 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16947 plus_constant (stack_pointer_rtx,
16949 RTX_FRAME_RELATED_P (dwarf) = 1;
16951 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
16957 if (frame_pointer_needed)
16958 thumb_set_frame_pointer (offsets);
16960 /* If we are profiling, make sure no instructions are scheduled before
16961 the call to mcount. Similarly if the user has requested no
16962 scheduling in the prolog. Similarly if we want non-call exceptions
16963 using the EABI unwinder, to prevent faulting instructions from being
16964 swapped with a stack adjustment. */
16965 if (current_function_profile || !TARGET_SCHED_PROLOG
16966 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
16967 emit_insn (gen_blockage ());
16969 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
16970 if (live_regs_mask & 0xff)
16971 cfun->machine->lr_save_eliminated = 0;
16976 thumb1_expand_epilogue (void)
16978 HOST_WIDE_INT amount;
16979 arm_stack_offsets *offsets;
16982 /* Naked functions don't have prologues. */
16983 if (IS_NAKED (arm_current_func_type ()))
16986 offsets = arm_get_frame_offsets ();
16987 amount = offsets->outgoing_args - offsets->saved_regs;
16989 if (frame_pointer_needed)
16991 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
16992 amount = offsets->locals_base - offsets->saved_regs;
16995 gcc_assert (amount >= 0);
16999 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17000 GEN_INT (amount)));
17003 /* r3 is always free in the epilogue. */
17004 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
17006 emit_insn (gen_movsi (reg, GEN_INT (amount)));
17007 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
17011 /* Emit a USE (stack_pointer_rtx), so that
17012 the stack adjustment will not be deleted. */
17013 emit_insn (gen_prologue_use (stack_pointer_rtx));
17015 if (current_function_profile || !TARGET_SCHED_PROLOG)
17016 emit_insn (gen_blockage ());
17018 /* Emit a clobber for each insn that will be restored in the epilogue,
17019 so that flow2 will get register lifetimes correct. */
17020 for (regno = 0; regno < 13; regno++)
17021 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
17022 emit_insn (gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, regno)));
17024 if (! df_regs_ever_live_p (LR_REGNUM))
17025 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, LR_REGNUM)));
17029 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
17031 arm_stack_offsets *offsets;
17032 unsigned long live_regs_mask = 0;
17033 unsigned long l_mask;
17034 unsigned high_regs_pushed = 0;
17035 int cfa_offset = 0;
17038 if (IS_NAKED (arm_current_func_type ()))
17041 if (is_called_in_ARM_mode (current_function_decl))
17045 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
17046 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
17048 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
17050 /* Generate code sequence to switch us into Thumb mode. */
17051 /* The .code 32 directive has already been emitted by
17052 ASM_DECLARE_FUNCTION_NAME. */
17053 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
17054 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
17056 /* Generate a label, so that the debugger will notice the
17057 change in instruction sets. This label is also used by
17058 the assembler to bypass the ARM code when this function
17059 is called from a Thumb encoded function elsewhere in the
17060 same file. Hence the definition of STUB_NAME here must
17061 agree with the definition in gas/config/tc-arm.c. */
17063 #define STUB_NAME ".real_start_of"
17065 fprintf (f, "\t.code\t16\n");
17067 if (arm_dllexport_name_p (name))
17068 name = arm_strip_name_encoding (name);
17070 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
17071 fprintf (f, "\t.thumb_func\n");
17072 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
17075 if (current_function_pretend_args_size)
17077 /* Output unwind directive for the stack adjustment. */
17078 if (ARM_EABI_UNWIND_TABLES)
17079 fprintf (f, "\t.pad #%d\n",
17080 current_function_pretend_args_size);
17082 if (cfun->machine->uses_anonymous_args)
17086 fprintf (f, "\tpush\t{");
17088 num_pushes = ARM_NUM_INTS (current_function_pretend_args_size);
17090 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
17091 regno <= LAST_ARG_REGNUM;
17093 asm_fprintf (f, "%r%s", regno,
17094 regno == LAST_ARG_REGNUM ? "" : ", ");
17096 fprintf (f, "}\n");
17099 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
17100 SP_REGNUM, SP_REGNUM,
17101 current_function_pretend_args_size);
17103 /* We don't need to record the stores for unwinding (would it
17104 help the debugger any if we did?), but record the change in
17105 the stack pointer. */
17106 if (dwarf2out_do_frame ())
17108 char *l = dwarf2out_cfi_label ();
17110 cfa_offset = cfa_offset + current_function_pretend_args_size;
17111 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17115 /* Get the registers we are going to push. */
17116 offsets = arm_get_frame_offsets ();
17117 live_regs_mask = offsets->saved_regs_mask;
17118 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
17119 l_mask = live_regs_mask & 0x40ff;
17120 /* Then count how many other high registers will need to be pushed. */
17121 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
17123 if (TARGET_BACKTRACE)
17126 unsigned work_register;
17128 /* We have been asked to create a stack backtrace structure.
17129 The code looks like this:
17133 0 sub SP, #16 Reserve space for 4 registers.
17134 2 push {R7} Push low registers.
17135 4 add R7, SP, #20 Get the stack pointer before the push.
17136 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
17137 8 mov R7, PC Get hold of the start of this code plus 12.
17138 10 str R7, [SP, #16] Store it.
17139 12 mov R7, FP Get hold of the current frame pointer.
17140 14 str R7, [SP, #4] Store it.
17141 16 mov R7, LR Get hold of the current return address.
17142 18 str R7, [SP, #12] Store it.
17143 20 add R7, SP, #16 Point at the start of the backtrace structure.
17144 22 mov FP, R7 Put this value into the frame pointer. */
17146 work_register = thumb_find_work_register (live_regs_mask);
17148 if (ARM_EABI_UNWIND_TABLES)
17149 asm_fprintf (f, "\t.pad #16\n");
17152 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
17153 SP_REGNUM, SP_REGNUM);
17155 if (dwarf2out_do_frame ())
17157 char *l = dwarf2out_cfi_label ();
17159 cfa_offset = cfa_offset + 16;
17160 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17165 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17166 offset = bit_count (l_mask) * UNITS_PER_WORD;
17171 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17172 offset + 16 + current_function_pretend_args_size);
17174 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17177 /* Make sure that the instruction fetching the PC is in the right place
17178 to calculate "start of backtrace creation code + 12". */
17181 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17182 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17184 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17185 ARM_HARD_FRAME_POINTER_REGNUM);
17186 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17191 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17192 ARM_HARD_FRAME_POINTER_REGNUM);
17193 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17195 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17196 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17200 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
17201 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17203 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17205 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
17206 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
17208 /* Optimization: If we are not pushing any low registers but we are going
17209 to push some high registers then delay our first push. This will just
17210 be a push of LR and we can combine it with the push of the first high
17212 else if ((l_mask & 0xff) != 0
17213 || (high_regs_pushed == 0 && l_mask))
17214 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17216 if (high_regs_pushed)
17218 unsigned pushable_regs;
17219 unsigned next_hi_reg;
17221 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
17222 if (live_regs_mask & (1 << next_hi_reg))
17225 pushable_regs = l_mask & 0xff;
17227 if (pushable_regs == 0)
17228 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
17230 while (high_regs_pushed > 0)
17232 unsigned long real_regs_mask = 0;
17234 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
17236 if (pushable_regs & (1 << regno))
17238 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
17240 high_regs_pushed --;
17241 real_regs_mask |= (1 << next_hi_reg);
17243 if (high_regs_pushed)
17245 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
17247 if (live_regs_mask & (1 << next_hi_reg))
17252 pushable_regs &= ~((1 << regno) - 1);
17258 /* If we had to find a work register and we have not yet
17259 saved the LR then add it to the list of regs to push. */
17260 if (l_mask == (1 << LR_REGNUM))
17262 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
17264 real_regs_mask | (1 << LR_REGNUM));
17268 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
17273 /* Handle the case of a double word load into a low register from
17274 a computed memory address. The computed address may involve a
17275 register which is overwritten by the load. */
17277 thumb_load_double_from_address (rtx *operands)
17285 gcc_assert (GET_CODE (operands[0]) == REG);
17286 gcc_assert (GET_CODE (operands[1]) == MEM);
17288 /* Get the memory address. */
17289 addr = XEXP (operands[1], 0);
17291 /* Work out how the memory address is computed. */
17292 switch (GET_CODE (addr))
17295 operands[2] = adjust_address (operands[1], SImode, 4);
17297 if (REGNO (operands[0]) == REGNO (addr))
17299 output_asm_insn ("ldr\t%H0, %2", operands);
17300 output_asm_insn ("ldr\t%0, %1", operands);
17304 output_asm_insn ("ldr\t%0, %1", operands);
17305 output_asm_insn ("ldr\t%H0, %2", operands);
17310 /* Compute <address> + 4 for the high order load. */
17311 operands[2] = adjust_address (operands[1], SImode, 4);
17313 output_asm_insn ("ldr\t%0, %1", operands);
17314 output_asm_insn ("ldr\t%H0, %2", operands);
17318 arg1 = XEXP (addr, 0);
17319 arg2 = XEXP (addr, 1);
17321 if (CONSTANT_P (arg1))
17322 base = arg2, offset = arg1;
17324 base = arg1, offset = arg2;
17326 gcc_assert (GET_CODE (base) == REG);
17328 /* Catch the case of <address> = <reg> + <reg> */
17329 if (GET_CODE (offset) == REG)
17331 int reg_offset = REGNO (offset);
17332 int reg_base = REGNO (base);
17333 int reg_dest = REGNO (operands[0]);
17335 /* Add the base and offset registers together into the
17336 higher destination register. */
17337 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
17338 reg_dest + 1, reg_base, reg_offset);
17340 /* Load the lower destination register from the address in
17341 the higher destination register. */
17342 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
17343 reg_dest, reg_dest + 1);
17345 /* Load the higher destination register from its own address
17347 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
17348 reg_dest + 1, reg_dest + 1);
17352 /* Compute <address> + 4 for the high order load. */
17353 operands[2] = adjust_address (operands[1], SImode, 4);
17355 /* If the computed address is held in the low order register
17356 then load the high order register first, otherwise always
17357 load the low order register first. */
17358 if (REGNO (operands[0]) == REGNO (base))
17360 output_asm_insn ("ldr\t%H0, %2", operands);
17361 output_asm_insn ("ldr\t%0, %1", operands);
17365 output_asm_insn ("ldr\t%0, %1", operands);
17366 output_asm_insn ("ldr\t%H0, %2", operands);
17372 /* With no registers to worry about we can just load the value
17374 operands[2] = adjust_address (operands[1], SImode, 4);
17376 output_asm_insn ("ldr\t%H0, %2", operands);
17377 output_asm_insn ("ldr\t%0, %1", operands);
17381 gcc_unreachable ();
17388 thumb_output_move_mem_multiple (int n, rtx *operands)
17395 if (REGNO (operands[4]) > REGNO (operands[5]))
17398 operands[4] = operands[5];
17401 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
17402 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
17406 if (REGNO (operands[4]) > REGNO (operands[5]))
17409 operands[4] = operands[5];
17412 if (REGNO (operands[5]) > REGNO (operands[6]))
17415 operands[5] = operands[6];
17418 if (REGNO (operands[4]) > REGNO (operands[5]))
17421 operands[4] = operands[5];
17425 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
17426 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
17430 gcc_unreachable ();
17436 /* Output a call-via instruction for thumb state. */
17438 thumb_call_via_reg (rtx reg)
17440 int regno = REGNO (reg);
17443 gcc_assert (regno < LR_REGNUM);
17445 /* If we are in the normal text section we can use a single instance
17446 per compilation unit. If we are doing function sections, then we need
17447 an entry per section, since we can't rely on reachability. */
17448 if (in_section == text_section)
17450 thumb_call_reg_needed = 1;
17452 if (thumb_call_via_label[regno] == NULL)
17453 thumb_call_via_label[regno] = gen_label_rtx ();
17454 labelp = thumb_call_via_label + regno;
17458 if (cfun->machine->call_via[regno] == NULL)
17459 cfun->machine->call_via[regno] = gen_label_rtx ();
17460 labelp = cfun->machine->call_via + regno;
17463 output_asm_insn ("bl\t%a0", labelp);
17467 /* Routines for generating rtl. */
17469 thumb_expand_movmemqi (rtx *operands)
17471 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
17472 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
17473 HOST_WIDE_INT len = INTVAL (operands[2]);
17474 HOST_WIDE_INT offset = 0;
17478 emit_insn (gen_movmem12b (out, in, out, in));
17484 emit_insn (gen_movmem8b (out, in, out, in));
17490 rtx reg = gen_reg_rtx (SImode);
17491 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
17492 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
17499 rtx reg = gen_reg_rtx (HImode);
17500 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
17501 plus_constant (in, offset))));
17502 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
17510 rtx reg = gen_reg_rtx (QImode);
17511 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
17512 plus_constant (in, offset))));
17513 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
17519 thumb_reload_out_hi (rtx *operands)
17521 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
17524 /* Handle reading a half-word from memory during reload. */
17526 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
17528 gcc_unreachable ();
17531 /* Return the length of a function name prefix
17532 that starts with the character 'c'. */
17534 arm_get_strip_length (int c)
17538 ARM_NAME_ENCODING_LENGTHS
17543 /* Return a pointer to a function's name with any
17544 and all prefix encodings stripped from it. */
17546 arm_strip_name_encoding (const char *name)
17550 while ((skip = arm_get_strip_length (* name)))
17556 /* If there is a '*' anywhere in the name's prefix, then
17557 emit the stripped name verbatim, otherwise prepend an
17558 underscore if leading underscores are being used. */
17560 arm_asm_output_labelref (FILE *stream, const char *name)
17565 while ((skip = arm_get_strip_length (* name)))
17567 verbatim |= (*name == '*');
17572 fputs (name, stream);
17574 asm_fprintf (stream, "%U%s", name);
17578 arm_file_start (void)
17582 if (TARGET_UNIFIED_ASM)
17583 asm_fprintf (asm_out_file, "\t.syntax unified\n");
17587 const char *fpu_name;
17588 if (arm_select[0].string)
17589 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
17590 else if (arm_select[1].string)
17591 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
17593 asm_fprintf (asm_out_file, "\t.cpu %s\n",
17594 all_cores[arm_default_cpu].name);
17596 if (TARGET_SOFT_FLOAT)
17599 fpu_name = "softvfp";
17601 fpu_name = "softfpa";
17605 int set_float_abi_attributes = 0;
17606 switch (arm_fpu_arch)
17611 case FPUTYPE_FPA_EMU2:
17614 case FPUTYPE_FPA_EMU3:
17617 case FPUTYPE_MAVERICK:
17618 fpu_name = "maverick";
17622 set_float_abi_attributes = 1;
17626 set_float_abi_attributes = 1;
17630 set_float_abi_attributes = 1;
17635 if (set_float_abi_attributes)
17637 if (TARGET_HARD_FLOAT)
17638 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
17639 if (TARGET_HARD_FLOAT_ABI)
17640 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
17643 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
17645 /* Some of these attributes only apply when the corresponding features
17646 are used. However we don't have any easy way of figuring this out.
17647 Conservatively record the setting that would have been used. */
17649 /* Tag_ABI_FP_rounding. */
17650 if (flag_rounding_math)
17651 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
17652 if (!flag_unsafe_math_optimizations)
17654 /* Tag_ABI_FP_denomal. */
17655 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
17656 /* Tag_ABI_FP_exceptions. */
17657 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
17659 /* Tag_ABI_FP_user_exceptions. */
17660 if (flag_signaling_nans)
17661 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
17662 /* Tag_ABI_FP_number_model. */
17663 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
17664 flag_finite_math_only ? 1 : 3);
17666 /* Tag_ABI_align8_needed. */
17667 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
17668 /* Tag_ABI_align8_preserved. */
17669 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
17670 /* Tag_ABI_enum_size. */
17671 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
17672 flag_short_enums ? 1 : 2);
17674 /* Tag_ABI_optimization_goals. */
17677 else if (optimize >= 2)
17683 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
17685 if (arm_lang_output_object_attributes_hook)
17686 arm_lang_output_object_attributes_hook();
17688 default_file_start();
17692 arm_file_end (void)
17696 if (NEED_INDICATE_EXEC_STACK)
17697 /* Add .note.GNU-stack. */
17698 file_end_indicate_exec_stack ();
17700 if (! thumb_call_reg_needed)
17703 switch_to_section (text_section);
17704 asm_fprintf (asm_out_file, "\t.code 16\n");
17705 ASM_OUTPUT_ALIGN (asm_out_file, 1);
17707 for (regno = 0; regno < LR_REGNUM; regno++)
17709 rtx label = thumb_call_via_label[regno];
17713 targetm.asm_out.internal_label (asm_out_file, "L",
17714 CODE_LABEL_NUMBER (label));
17715 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
17721 /* Symbols in the text segment can be accessed without indirecting via the
17722 constant pool; it may take an extra binary operation, but this is still
17723 faster than indirecting via memory. Don't do this when not optimizing,
17724 since we won't be calculating al of the offsets necessary to do this
17728 arm_encode_section_info (tree decl, rtx rtl, int first)
17730 if (optimize > 0 && TREE_CONSTANT (decl))
17731 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
17733 default_encode_section_info (decl, rtl, first);
17735 #endif /* !ARM_PE */
17738 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
17740 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
17741 && !strcmp (prefix, "L"))
17743 arm_ccfsm_state = 0;
17744 arm_target_insn = NULL;
17746 default_internal_label (stream, prefix, labelno);
17749 /* Output code to add DELTA to the first argument, and then jump
17750 to FUNCTION. Used for C++ multiple inheritance. */
17752 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
17753 HOST_WIDE_INT delta,
17754 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
17757 static int thunk_label = 0;
17760 int mi_delta = delta;
17761 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
17763 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
17766 mi_delta = - mi_delta;
17770 int labelno = thunk_label++;
17771 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
17772 /* Thunks are entered in arm mode when avaiable. */
17773 if (TARGET_THUMB1_ONLY)
17775 /* push r3 so we can use it as a temporary. */
17776 /* TODO: Omit this save if r3 is not used. */
17777 fputs ("\tpush {r3}\n", file);
17778 fputs ("\tldr\tr3, ", file);
17782 fputs ("\tldr\tr12, ", file);
17784 assemble_name (file, label);
17785 fputc ('\n', file);
17788 /* If we are generating PIC, the ldr instruction below loads
17789 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
17790 the address of the add + 8, so we have:
17792 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
17795 Note that we have "+ 1" because some versions of GNU ld
17796 don't set the low bit of the result for R_ARM_REL32
17797 relocations against thumb function symbols.
17798 On ARMv6M this is +4, not +8. */
17799 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
17800 assemble_name (file, labelpc);
17801 fputs (":\n", file);
17802 if (TARGET_THUMB1_ONLY)
17804 /* This is 2 insns after the start of the thunk, so we know it
17805 is 4-byte aligned. */
17806 fputs ("\tadd\tr3, pc, r3\n", file);
17807 fputs ("\tmov r12, r3\n", file);
17810 fputs ("\tadd\tr12, pc, r12\n", file);
17812 else if (TARGET_THUMB1_ONLY)
17813 fputs ("\tmov r12, r3\n", file);
17815 if (TARGET_THUMB1_ONLY)
17817 if (mi_delta > 255)
17819 fputs ("\tldr\tr3, ", file);
17820 assemble_name (file, label);
17821 fputs ("+4\n", file);
17822 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
17823 mi_op, this_regno, this_regno);
17825 else if (mi_delta != 0)
17827 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
17828 mi_op, this_regno, this_regno,
17834 /* TODO: Use movw/movt for large constants when available. */
17835 while (mi_delta != 0)
17837 if ((mi_delta & (3 << shift)) == 0)
17841 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
17842 mi_op, this_regno, this_regno,
17843 mi_delta & (0xff << shift));
17844 mi_delta &= ~(0xff << shift);
17851 if (TARGET_THUMB1_ONLY)
17852 fputs ("\tpop\t{r3}\n", file);
17854 fprintf (file, "\tbx\tr12\n");
17855 ASM_OUTPUT_ALIGN (file, 2);
17856 assemble_name (file, label);
17857 fputs (":\n", file);
17860 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
17861 rtx tem = XEXP (DECL_RTL (function), 0);
17862 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
17863 tem = gen_rtx_MINUS (GET_MODE (tem),
17865 gen_rtx_SYMBOL_REF (Pmode,
17866 ggc_strdup (labelpc)));
17867 assemble_integer (tem, 4, BITS_PER_WORD, 1);
17870 /* Output ".word .LTHUNKn". */
17871 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
17873 if (TARGET_THUMB1_ONLY && mi_delta > 255)
17874 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
17878 fputs ("\tb\t", file);
17879 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
17880 if (NEED_PLT_RELOC)
17881 fputs ("(PLT)", file);
17882 fputc ('\n', file);
17887 arm_emit_vector_const (FILE *file, rtx x)
17890 const char * pattern;
17892 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17894 switch (GET_MODE (x))
17896 case V2SImode: pattern = "%08x"; break;
17897 case V4HImode: pattern = "%04x"; break;
17898 case V8QImode: pattern = "%02x"; break;
17899 default: gcc_unreachable ();
17902 fprintf (file, "0x");
17903 for (i = CONST_VECTOR_NUNITS (x); i--;)
17907 element = CONST_VECTOR_ELT (x, i);
17908 fprintf (file, pattern, INTVAL (element));
17915 arm_output_load_gr (rtx *operands)
17922 if (GET_CODE (operands [1]) != MEM
17923 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
17924 || GET_CODE (reg = XEXP (sum, 0)) != REG
17925 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
17926 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
17927 return "wldrw%?\t%0, %1";
17929 /* Fix up an out-of-range load of a GR register. */
17930 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
17931 wcgr = operands[0];
17933 output_asm_insn ("ldr%?\t%0, %1", operands);
17935 operands[0] = wcgr;
17937 output_asm_insn ("tmcr%?\t%0, %1", operands);
17938 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
17943 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
17945 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
17946 named arg and all anonymous args onto the stack.
17947 XXX I know the prologue shouldn't be pushing registers, but it is faster
17951 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
17952 enum machine_mode mode,
17955 int second_time ATTRIBUTE_UNUSED)
17957 int nregs = cum->nregs;
17959 && ARM_DOUBLEWORD_ALIGN
17960 && arm_needs_doubleword_align (mode, type))
17963 cfun->machine->uses_anonymous_args = 1;
17964 if (nregs < NUM_ARG_REGS)
17965 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
17968 /* Return nonzero if the CONSUMER instruction (a store) does not need
17969 PRODUCER's value to calculate the address. */
17972 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
17974 rtx value = PATTERN (producer);
17975 rtx addr = PATTERN (consumer);
17977 if (GET_CODE (value) == COND_EXEC)
17978 value = COND_EXEC_CODE (value);
17979 if (GET_CODE (value) == PARALLEL)
17980 value = XVECEXP (value, 0, 0);
17981 value = XEXP (value, 0);
17982 if (GET_CODE (addr) == COND_EXEC)
17983 addr = COND_EXEC_CODE (addr);
17984 if (GET_CODE (addr) == PARALLEL)
17985 addr = XVECEXP (addr, 0, 0);
17986 addr = XEXP (addr, 0);
17988 return !reg_overlap_mentioned_p (value, addr);
17991 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
17992 have an early register shift value or amount dependency on the
17993 result of PRODUCER. */
17996 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
17998 rtx value = PATTERN (producer);
17999 rtx op = PATTERN (consumer);
18002 if (GET_CODE (value) == COND_EXEC)
18003 value = COND_EXEC_CODE (value);
18004 if (GET_CODE (value) == PARALLEL)
18005 value = XVECEXP (value, 0, 0);
18006 value = XEXP (value, 0);
18007 if (GET_CODE (op) == COND_EXEC)
18008 op = COND_EXEC_CODE (op);
18009 if (GET_CODE (op) == PARALLEL)
18010 op = XVECEXP (op, 0, 0);
18013 early_op = XEXP (op, 0);
18014 /* This is either an actual independent shift, or a shift applied to
18015 the first operand of another operation. We want the whole shift
18017 if (GET_CODE (early_op) == REG)
18020 return !reg_overlap_mentioned_p (value, early_op);
18023 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18024 have an early register shift value dependency on the result of
18028 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
18030 rtx value = PATTERN (producer);
18031 rtx op = PATTERN (consumer);
18034 if (GET_CODE (value) == COND_EXEC)
18035 value = COND_EXEC_CODE (value);
18036 if (GET_CODE (value) == PARALLEL)
18037 value = XVECEXP (value, 0, 0);
18038 value = XEXP (value, 0);
18039 if (GET_CODE (op) == COND_EXEC)
18040 op = COND_EXEC_CODE (op);
18041 if (GET_CODE (op) == PARALLEL)
18042 op = XVECEXP (op, 0, 0);
18045 early_op = XEXP (op, 0);
18047 /* This is either an actual independent shift, or a shift applied to
18048 the first operand of another operation. We want the value being
18049 shifted, in either case. */
18050 if (GET_CODE (early_op) != REG)
18051 early_op = XEXP (early_op, 0);
18053 return !reg_overlap_mentioned_p (value, early_op);
18056 /* Return nonzero if the CONSUMER (a mul or mac op) does not
18057 have an early register mult dependency on the result of
18061 arm_no_early_mul_dep (rtx producer, rtx consumer)
18063 rtx value = PATTERN (producer);
18064 rtx op = PATTERN (consumer);
18066 if (GET_CODE (value) == COND_EXEC)
18067 value = COND_EXEC_CODE (value);
18068 if (GET_CODE (value) == PARALLEL)
18069 value = XVECEXP (value, 0, 0);
18070 value = XEXP (value, 0);
18071 if (GET_CODE (op) == COND_EXEC)
18072 op = COND_EXEC_CODE (op);
18073 if (GET_CODE (op) == PARALLEL)
18074 op = XVECEXP (op, 0, 0);
18077 return (GET_CODE (op) == PLUS
18078 && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
18081 /* We can't rely on the caller doing the proper promotion when
18082 using APCS or ATPCS. */
18085 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
18087 return !TARGET_AAPCS_BASED;
18091 /* AAPCS based ABIs use short enums by default. */
18094 arm_default_short_enums (void)
18096 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
18100 /* AAPCS requires that anonymous bitfields affect structure alignment. */
18103 arm_align_anon_bitfield (void)
18105 return TARGET_AAPCS_BASED;
18109 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
18112 arm_cxx_guard_type (void)
18114 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
18117 /* Return non-zero if the consumer (a multiply-accumulate instruction)
18118 has an accumulator dependency on the result of the producer (a
18119 multiplication instruction) and no other dependency on that result. */
18121 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
18123 rtx mul = PATTERN (producer);
18124 rtx mac = PATTERN (consumer);
18126 rtx mac_op0, mac_op1, mac_acc;
18128 if (GET_CODE (mul) == COND_EXEC)
18129 mul = COND_EXEC_CODE (mul);
18130 if (GET_CODE (mac) == COND_EXEC)
18131 mac = COND_EXEC_CODE (mac);
18133 /* Check that mul is of the form (set (...) (mult ...))
18134 and mla is of the form (set (...) (plus (mult ...) (...))). */
18135 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
18136 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
18137 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
18140 mul_result = XEXP (mul, 0);
18141 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
18142 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
18143 mac_acc = XEXP (XEXP (mac, 1), 1);
18145 return (reg_overlap_mentioned_p (mul_result, mac_acc)
18146 && !reg_overlap_mentioned_p (mul_result, mac_op0)
18147 && !reg_overlap_mentioned_p (mul_result, mac_op1));
18151 /* The EABI says test the least significant bit of a guard variable. */
18154 arm_cxx_guard_mask_bit (void)
18156 return TARGET_AAPCS_BASED;
18160 /* The EABI specifies that all array cookies are 8 bytes long. */
18163 arm_get_cookie_size (tree type)
18167 if (!TARGET_AAPCS_BASED)
18168 return default_cxx_get_cookie_size (type);
18170 size = build_int_cst (sizetype, 8);
18175 /* The EABI says that array cookies should also contain the element size. */
18178 arm_cookie_has_size (void)
18180 return TARGET_AAPCS_BASED;
18184 /* The EABI says constructors and destructors should return a pointer to
18185 the object constructed/destroyed. */
18188 arm_cxx_cdtor_returns_this (void)
18190 return TARGET_AAPCS_BASED;
18193 /* The EABI says that an inline function may never be the key
18197 arm_cxx_key_method_may_be_inline (void)
18199 return !TARGET_AAPCS_BASED;
18203 arm_cxx_determine_class_data_visibility (tree decl)
18205 if (!TARGET_AAPCS_BASED)
18208 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
18209 is exported. However, on systems without dynamic vague linkage,
18210 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
18211 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
18212 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
18214 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
18215 DECL_VISIBILITY_SPECIFIED (decl) = 1;
18219 arm_cxx_class_data_always_comdat (void)
18221 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
18222 vague linkage if the class has no key function. */
18223 return !TARGET_AAPCS_BASED;
18227 /* The EABI says __aeabi_atexit should be used to register static
18231 arm_cxx_use_aeabi_atexit (void)
18233 return TARGET_AAPCS_BASED;
18238 arm_set_return_address (rtx source, rtx scratch)
18240 arm_stack_offsets *offsets;
18241 HOST_WIDE_INT delta;
18243 unsigned long saved_regs;
18245 offsets = arm_get_frame_offsets ();
18246 saved_regs = offsets->saved_regs_mask;
18248 if ((saved_regs & (1 << LR_REGNUM)) == 0)
18249 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18252 if (frame_pointer_needed)
18253 addr = plus_constant(hard_frame_pointer_rtx, -4);
18256 /* LR will be the first saved register. */
18257 delta = offsets->outgoing_args - (offsets->frame + 4);
18262 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
18263 GEN_INT (delta & ~4095)));
18268 addr = stack_pointer_rtx;
18270 addr = plus_constant (addr, delta);
18272 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18278 thumb_set_return_address (rtx source, rtx scratch)
18280 arm_stack_offsets *offsets;
18281 HOST_WIDE_INT delta;
18282 HOST_WIDE_INT limit;
18285 unsigned long mask;
18287 emit_insn (gen_rtx_USE (VOIDmode, source));
18289 offsets = arm_get_frame_offsets ();
18290 mask = offsets->saved_regs_mask;
18291 if (mask & (1 << LR_REGNUM))
18294 /* Find the saved regs. */
18295 if (frame_pointer_needed)
18297 delta = offsets->soft_frame - offsets->saved_args;
18298 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
18304 delta = offsets->outgoing_args - offsets->saved_args;
18307 /* Allow for the stack frame. */
18308 if (TARGET_THUMB1 && TARGET_BACKTRACE)
18310 /* The link register is always the first saved register. */
18313 /* Construct the address. */
18314 addr = gen_rtx_REG (SImode, reg);
18317 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
18318 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
18322 addr = plus_constant (addr, delta);
18324 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18327 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18330 /* Implements target hook vector_mode_supported_p. */
18332 arm_vector_mode_supported_p (enum machine_mode mode)
18334 /* Neon also supports V2SImode, etc. listed in the clause below. */
18335 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
18336 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
18339 if ((mode == V2SImode)
18340 || (mode == V4HImode)
18341 || (mode == V8QImode))
18347 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
18348 ARM insns and therefore guarantee that the shift count is modulo 256.
18349 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
18350 guarantee no particular behavior for out-of-range counts. */
18352 static unsigned HOST_WIDE_INT
18353 arm_shift_truncation_mask (enum machine_mode mode)
18355 return mode == SImode ? 255 : 0;
18359 /* Map internal gcc register numbers to DWARF2 register numbers. */
18362 arm_dbx_register_number (unsigned int regno)
18367 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
18368 compatibility. The EABI defines them as registers 96-103. */
18369 if (IS_FPA_REGNUM (regno))
18370 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
18372 /* FIXME: VFPv3 register numbering. */
18373 if (IS_VFP_REGNUM (regno))
18374 return 64 + regno - FIRST_VFP_REGNUM;
18376 if (IS_IWMMXT_GR_REGNUM (regno))
18377 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
18379 if (IS_IWMMXT_REGNUM (regno))
18380 return 112 + regno - FIRST_IWMMXT_REGNUM;
18382 gcc_unreachable ();
18386 #ifdef TARGET_UNWIND_INFO
18387 /* Emit unwind directives for a store-multiple instruction or stack pointer
18388 push during alignment.
18389 These should only ever be generated by the function prologue code, so
18390 expect them to have a particular form. */
18393 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
18396 HOST_WIDE_INT offset;
18397 HOST_WIDE_INT nregs;
18403 e = XVECEXP (p, 0, 0);
18404 if (GET_CODE (e) != SET)
18407 /* First insn will adjust the stack pointer. */
18408 if (GET_CODE (e) != SET
18409 || GET_CODE (XEXP (e, 0)) != REG
18410 || REGNO (XEXP (e, 0)) != SP_REGNUM
18411 || GET_CODE (XEXP (e, 1)) != PLUS)
18414 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
18415 nregs = XVECLEN (p, 0) - 1;
18417 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
18420 /* The function prologue may also push pc, but not annotate it as it is
18421 never restored. We turn this into a stack pointer adjustment. */
18422 if (nregs * 4 == offset - 4)
18424 fprintf (asm_out_file, "\t.pad #4\n");
18428 fprintf (asm_out_file, "\t.save {");
18430 else if (IS_VFP_REGNUM (reg))
18433 fprintf (asm_out_file, "\t.vsave {");
18435 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
18437 /* FPA registers are done differently. */
18438 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
18442 /* Unknown register type. */
18445 /* If the stack increment doesn't match the size of the saved registers,
18446 something has gone horribly wrong. */
18447 if (offset != nregs * reg_size)
18452 /* The remaining insns will describe the stores. */
18453 for (i = 1; i <= nregs; i++)
18455 /* Expect (set (mem <addr>) (reg)).
18456 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
18457 e = XVECEXP (p, 0, i);
18458 if (GET_CODE (e) != SET
18459 || GET_CODE (XEXP (e, 0)) != MEM
18460 || GET_CODE (XEXP (e, 1)) != REG)
18463 reg = REGNO (XEXP (e, 1));
18468 fprintf (asm_out_file, ", ");
18469 /* We can't use %r for vfp because we need to use the
18470 double precision register names. */
18471 if (IS_VFP_REGNUM (reg))
18472 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
18474 asm_fprintf (asm_out_file, "%r", reg);
18476 #ifdef ENABLE_CHECKING
18477 /* Check that the addresses are consecutive. */
18478 e = XEXP (XEXP (e, 0), 0);
18479 if (GET_CODE (e) == PLUS)
18481 offset += reg_size;
18482 if (GET_CODE (XEXP (e, 0)) != REG
18483 || REGNO (XEXP (e, 0)) != SP_REGNUM
18484 || GET_CODE (XEXP (e, 1)) != CONST_INT
18485 || offset != INTVAL (XEXP (e, 1)))
18489 || GET_CODE (e) != REG
18490 || REGNO (e) != SP_REGNUM)
18494 fprintf (asm_out_file, "}\n");
18497 /* Emit unwind directives for a SET. */
18500 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
18508 switch (GET_CODE (e0))
18511 /* Pushing a single register. */
18512 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
18513 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
18514 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
18517 asm_fprintf (asm_out_file, "\t.save ");
18518 if (IS_VFP_REGNUM (REGNO (e1)))
18519 asm_fprintf(asm_out_file, "{d%d}\n",
18520 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
18522 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
18526 if (REGNO (e0) == SP_REGNUM)
18528 /* A stack increment. */
18529 if (GET_CODE (e1) != PLUS
18530 || GET_CODE (XEXP (e1, 0)) != REG
18531 || REGNO (XEXP (e1, 0)) != SP_REGNUM
18532 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18535 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
18536 -INTVAL (XEXP (e1, 1)));
18538 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
18540 HOST_WIDE_INT offset;
18542 if (GET_CODE (e1) == PLUS)
18544 if (GET_CODE (XEXP (e1, 0)) != REG
18545 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18547 reg = REGNO (XEXP (e1, 0));
18548 offset = INTVAL (XEXP (e1, 1));
18549 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
18550 HARD_FRAME_POINTER_REGNUM, reg,
18551 INTVAL (XEXP (e1, 1)));
18553 else if (GET_CODE (e1) == REG)
18556 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
18557 HARD_FRAME_POINTER_REGNUM, reg);
18562 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
18564 /* Move from sp to reg. */
18565 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
18567 else if (GET_CODE (e1) == PLUS
18568 && GET_CODE (XEXP (e1, 0)) == REG
18569 && REGNO (XEXP (e1, 0)) == SP_REGNUM
18570 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
18572 /* Set reg to offset from sp. */
18573 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
18574 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
18576 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
18578 /* Stack pointer save before alignment. */
18580 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
18593 /* Emit unwind directives for the given insn. */
18596 arm_unwind_emit (FILE * asm_out_file, rtx insn)
18600 if (!ARM_EABI_UNWIND_TABLES)
18603 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
18606 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
18608 pat = XEXP (pat, 0);
18610 pat = PATTERN (insn);
18612 switch (GET_CODE (pat))
18615 arm_unwind_emit_set (asm_out_file, pat);
18619 /* Store multiple. */
18620 arm_unwind_emit_sequence (asm_out_file, pat);
18629 /* Output a reference from a function exception table to the type_info
18630 object X. The EABI specifies that the symbol should be relocated by
18631 an R_ARM_TARGET2 relocation. */
18634 arm_output_ttype (rtx x)
18636 fputs ("\t.word\t", asm_out_file);
18637 output_addr_const (asm_out_file, x);
18638 /* Use special relocations for symbol references. */
18639 if (GET_CODE (x) != CONST_INT)
18640 fputs ("(TARGET2)", asm_out_file);
18641 fputc ('\n', asm_out_file);
18645 #endif /* TARGET_UNWIND_INFO */
18648 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
18649 stack alignment. */
18652 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
18654 rtx unspec = SET_SRC (pattern);
18655 gcc_assert (GET_CODE (unspec) == UNSPEC);
18659 case UNSPEC_STACK_ALIGN:
18660 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
18661 put anything on the stack, so hopefully it won't matter.
18662 CFA = SP will be correct after alignment. */
18663 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
18664 SET_DEST (pattern));
18667 gcc_unreachable ();
18672 /* Output unwind directives for the start/end of a function. */
18675 arm_output_fn_unwind (FILE * f, bool prologue)
18677 if (!ARM_EABI_UNWIND_TABLES)
18681 fputs ("\t.fnstart\n", f);
18683 fputs ("\t.fnend\n", f);
18687 arm_emit_tls_decoration (FILE *fp, rtx x)
18689 enum tls_reloc reloc;
18692 val = XVECEXP (x, 0, 0);
18693 reloc = INTVAL (XVECEXP (x, 0, 1));
18695 output_addr_const (fp, val);
18700 fputs ("(tlsgd)", fp);
18703 fputs ("(tlsldm)", fp);
18706 fputs ("(tlsldo)", fp);
18709 fputs ("(gottpoff)", fp);
18712 fputs ("(tpoff)", fp);
18715 gcc_unreachable ();
18723 fputs (" + (. - ", fp);
18724 output_addr_const (fp, XVECEXP (x, 0, 2));
18726 output_addr_const (fp, XVECEXP (x, 0, 3));
18736 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
18739 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
18741 gcc_assert (size == 4);
18742 fputs ("\t.word\t", file);
18743 output_addr_const (file, x);
18744 fputs ("(tlsldo)", file);
18748 arm_output_addr_const_extra (FILE *fp, rtx x)
18750 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
18751 return arm_emit_tls_decoration (fp, x);
18752 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
18755 int labelno = INTVAL (XVECEXP (x, 0, 0));
18757 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
18758 assemble_name_raw (fp, label);
18762 else if (GET_CODE (x) == CONST_VECTOR)
18763 return arm_emit_vector_const (fp, x);
18768 /* Output assembly for a shift instruction.
18769 SET_FLAGS determines how the instruction modifies the condition codes.
18770 0 - Do not set condition codes.
18771 1 - Set condition codes.
18772 2 - Use smallest instruction. */
18774 arm_output_shift(rtx * operands, int set_flags)
18777 static const char flag_chars[3] = {'?', '.', '!'};
18782 c = flag_chars[set_flags];
18783 if (TARGET_UNIFIED_ASM)
18785 shift = shift_op(operands[3], &val);
18789 operands[2] = GEN_INT(val);
18790 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
18793 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
18796 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
18797 output_asm_insn (pattern, operands);
18801 /* Output a Thumb-2 casesi instruction. */
18803 thumb2_output_casesi (rtx *operands)
18805 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
18807 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
18809 output_asm_insn ("cmp\t%0, %1", operands);
18810 output_asm_insn ("bhi\t%l3", operands);
18811 switch (GET_MODE(diff_vec))
18814 return "tbb\t[%|pc, %0]";
18816 return "tbh\t[%|pc, %0, lsl #1]";
18820 output_asm_insn ("adr\t%4, %l2", operands);
18821 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
18822 output_asm_insn ("add\t%4, %4, %5", operands);
18827 output_asm_insn ("adr\t%4, %l2", operands);
18828 return "ldr\t%|pc, [%4, %0, lsl #2]";
18831 gcc_unreachable ();
18835 /* Most ARM cores are single issue, but some newer ones can dual issue.
18836 The scheduler descriptions rely on this being correct. */
18838 arm_issue_rate (void)
18851 /* A table and a function to perform ARM-specific name mangling for
18852 NEON vector types in order to conform to the AAPCS (see "Procedure
18853 Call Standard for the ARM Architecture", Appendix A). To qualify
18854 for emission with the mangled names defined in that document, a
18855 vector type must not only be of the correct mode but also be
18856 composed of NEON vector element types (e.g. __builtin_neon_qi). */
18859 enum machine_mode mode;
18860 const char *element_type_name;
18861 const char *aapcs_name;
18862 } arm_mangle_map_entry;
18864 static arm_mangle_map_entry arm_mangle_map[] = {
18865 /* 64-bit containerized types. */
18866 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
18867 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
18868 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
18869 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
18870 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
18871 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
18872 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
18873 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
18874 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
18875 /* 128-bit containerized types. */
18876 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
18877 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
18878 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
18879 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
18880 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
18881 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
18882 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
18883 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
18884 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
18885 { VOIDmode, NULL, NULL }
18889 arm_mangle_type (const_tree type)
18891 arm_mangle_map_entry *pos = arm_mangle_map;
18893 if (TREE_CODE (type) != VECTOR_TYPE)
18896 /* Check the mode of the vector type, and the name of the vector
18897 element type, against the table. */
18898 while (pos->mode != VOIDmode)
18900 tree elt_type = TREE_TYPE (type);
18902 if (pos->mode == TYPE_MODE (type)
18903 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
18904 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
18905 pos->element_type_name))
18906 return pos->aapcs_name;
18911 /* Use the default mangling for unrecognized (possibly user-defined)
18916 #include "gt-arm.h"