1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
4 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5 and Martin Simmons (@harleqn.co.uk).
6 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published
12 by the Free Software Foundation; either version 3, or (at your
13 option) any later version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
32 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
48 #include "integrate.h"
51 #include "target-def.h"
53 #include "langhooks.h"
56 /* Forward definitions of types. */
57 typedef struct minipool_node Mnode;
58 typedef struct minipool_fixup Mfix;
60 const struct attribute_spec arm_attribute_table[];
62 void (*arm_lang_output_object_attributes_hook)(void);
64 /* Forward function declarations. */
65 static int arm_compute_static_chain_stack_bytes (void);
66 static arm_stack_offsets *arm_get_frame_offsets (void);
67 static void arm_add_gc_roots (void);
68 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
69 HOST_WIDE_INT, rtx, rtx, int, int);
70 static unsigned bit_count (unsigned long);
71 static int arm_address_register_rtx_p (rtx, int);
72 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
73 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
74 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
75 inline static int thumb1_index_register_rtx_p (rtx, int);
76 static int thumb_far_jump_used_p (void);
77 static bool thumb_force_lr_save (void);
78 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
79 static rtx emit_sfm (int, int);
80 static unsigned arm_size_return_regs (void);
81 static bool arm_assemble_integer (rtx, unsigned int, int);
82 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
83 static arm_cc get_arm_condition_code (rtx);
84 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
85 static rtx is_jump_table (rtx);
86 static const char *output_multi_immediate (rtx *, const char *, const char *,
88 static const char *shift_op (rtx, HOST_WIDE_INT *);
89 static struct machine_function *arm_init_machine_status (void);
90 static void thumb_exit (FILE *, int);
91 static rtx is_jump_table (rtx);
92 static HOST_WIDE_INT get_jump_table_size (rtx);
93 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
94 static Mnode *add_minipool_forward_ref (Mfix *);
95 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
96 static Mnode *add_minipool_backward_ref (Mfix *);
97 static void assign_minipool_offsets (Mfix *);
98 static void arm_print_value (FILE *, rtx);
99 static void dump_minipool (rtx);
100 static int arm_barrier_cost (rtx);
101 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
102 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
103 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
105 static void arm_reorg (void);
106 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
107 static unsigned long arm_compute_save_reg0_reg12_mask (void);
108 static unsigned long arm_compute_save_reg_mask (void);
109 static unsigned long arm_isr_value (tree);
110 static unsigned long arm_compute_func_type (void);
111 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
112 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
113 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
114 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
116 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
117 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
118 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
119 static int arm_comp_type_attributes (const_tree, const_tree);
120 static void arm_set_default_type_attributes (tree);
121 static int arm_adjust_cost (rtx, rtx, rtx, int);
122 static int count_insns_for_constant (HOST_WIDE_INT, int);
123 static int arm_get_strip_length (int);
124 static bool arm_function_ok_for_sibcall (tree, tree);
125 static void arm_internal_label (FILE *, const char *, unsigned long);
126 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
128 static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
129 static bool arm_size_rtx_costs (rtx, int, int, int *);
130 static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
131 static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
132 static bool arm_xscale_rtx_costs (rtx, int, int, int *);
133 static bool arm_9e_rtx_costs (rtx, int, int, int *);
134 static bool arm_rtx_costs (rtx, int, int, int *, bool);
135 static int arm_address_cost (rtx, bool);
136 static bool arm_memory_load_p (rtx);
137 static bool arm_cirrus_insn_p (rtx);
138 static void cirrus_reorg (rtx);
139 static void arm_init_builtins (void);
140 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
141 static void arm_init_iwmmxt_builtins (void);
142 static rtx safe_vector_operand (rtx, enum machine_mode);
143 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
144 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
145 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
146 static void emit_constant_insn (rtx cond, rtx pattern);
147 static rtx emit_set_insn (rtx, rtx);
148 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
151 #ifdef OBJECT_FORMAT_ELF
152 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
153 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
156 static void arm_encode_section_info (tree, rtx, int);
159 static void arm_file_end (void);
160 static void arm_file_start (void);
162 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
164 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
165 enum machine_mode, const_tree, bool);
166 static bool arm_promote_prototypes (const_tree);
167 static bool arm_default_short_enums (void);
168 static bool arm_align_anon_bitfield (void);
169 static bool arm_return_in_msb (const_tree);
170 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
171 static bool arm_return_in_memory (const_tree, const_tree);
172 #ifdef TARGET_UNWIND_INFO
173 static void arm_unwind_emit (FILE *, rtx);
174 static bool arm_output_ttype (rtx);
176 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
178 static tree arm_cxx_guard_type (void);
179 static bool arm_cxx_guard_mask_bit (void);
180 static tree arm_get_cookie_size (tree);
181 static bool arm_cookie_has_size (void);
182 static bool arm_cxx_cdtor_returns_this (void);
183 static bool arm_cxx_key_method_may_be_inline (void);
184 static void arm_cxx_determine_class_data_visibility (tree);
185 static bool arm_cxx_class_data_always_comdat (void);
186 static bool arm_cxx_use_aeabi_atexit (void);
187 static void arm_init_libfuncs (void);
188 static bool arm_handle_option (size_t, const char *, int);
189 static void arm_target_help (void);
190 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
191 static bool arm_cannot_copy_insn_p (rtx);
192 static bool arm_tls_symbol_p (rtx x);
193 static int arm_issue_rate (void);
194 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
195 static bool arm_allocate_stack_slots_for_args (void);
198 /* Initialize the GCC target structure. */
199 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
200 #undef TARGET_MERGE_DECL_ATTRIBUTES
201 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
204 #undef TARGET_ATTRIBUTE_TABLE
205 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
207 #undef TARGET_ASM_FILE_START
208 #define TARGET_ASM_FILE_START arm_file_start
209 #undef TARGET_ASM_FILE_END
210 #define TARGET_ASM_FILE_END arm_file_end
212 #undef TARGET_ASM_ALIGNED_SI_OP
213 #define TARGET_ASM_ALIGNED_SI_OP NULL
214 #undef TARGET_ASM_INTEGER
215 #define TARGET_ASM_INTEGER arm_assemble_integer
217 #undef TARGET_ASM_FUNCTION_PROLOGUE
218 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
220 #undef TARGET_ASM_FUNCTION_EPILOGUE
221 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
223 #undef TARGET_DEFAULT_TARGET_FLAGS
224 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
225 #undef TARGET_HANDLE_OPTION
226 #define TARGET_HANDLE_OPTION arm_handle_option
228 #define TARGET_HELP arm_target_help
230 #undef TARGET_COMP_TYPE_ATTRIBUTES
231 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
233 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
234 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
236 #undef TARGET_SCHED_ADJUST_COST
237 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
239 #undef TARGET_ENCODE_SECTION_INFO
241 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
243 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
246 #undef TARGET_STRIP_NAME_ENCODING
247 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
249 #undef TARGET_ASM_INTERNAL_LABEL
250 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
252 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
253 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
255 #undef TARGET_ASM_OUTPUT_MI_THUNK
256 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
257 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
258 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
260 #undef TARGET_RTX_COSTS
261 #define TARGET_RTX_COSTS arm_rtx_costs
262 #undef TARGET_ADDRESS_COST
263 #define TARGET_ADDRESS_COST arm_address_cost
265 #undef TARGET_SHIFT_TRUNCATION_MASK
266 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
267 #undef TARGET_VECTOR_MODE_SUPPORTED_P
268 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
270 #undef TARGET_MACHINE_DEPENDENT_REORG
271 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
273 #undef TARGET_INIT_BUILTINS
274 #define TARGET_INIT_BUILTINS arm_init_builtins
275 #undef TARGET_EXPAND_BUILTIN
276 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
278 #undef TARGET_INIT_LIBFUNCS
279 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
281 #undef TARGET_PROMOTE_FUNCTION_ARGS
282 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
283 #undef TARGET_PROMOTE_FUNCTION_RETURN
284 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
285 #undef TARGET_PROMOTE_PROTOTYPES
286 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
287 #undef TARGET_PASS_BY_REFERENCE
288 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
289 #undef TARGET_ARG_PARTIAL_BYTES
290 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
292 #undef TARGET_SETUP_INCOMING_VARARGS
293 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
295 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
296 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
298 #undef TARGET_DEFAULT_SHORT_ENUMS
299 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
301 #undef TARGET_ALIGN_ANON_BITFIELD
302 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
304 #undef TARGET_NARROW_VOLATILE_BITFIELD
305 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
307 #undef TARGET_CXX_GUARD_TYPE
308 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
310 #undef TARGET_CXX_GUARD_MASK_BIT
311 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
313 #undef TARGET_CXX_GET_COOKIE_SIZE
314 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
316 #undef TARGET_CXX_COOKIE_HAS_SIZE
317 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
319 #undef TARGET_CXX_CDTOR_RETURNS_THIS
320 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
322 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
323 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
325 #undef TARGET_CXX_USE_AEABI_ATEXIT
326 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
328 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
329 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
330 arm_cxx_determine_class_data_visibility
332 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
333 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
335 #undef TARGET_RETURN_IN_MSB
336 #define TARGET_RETURN_IN_MSB arm_return_in_msb
338 #undef TARGET_RETURN_IN_MEMORY
339 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
341 #undef TARGET_MUST_PASS_IN_STACK
342 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
344 #ifdef TARGET_UNWIND_INFO
345 #undef TARGET_UNWIND_EMIT
346 #define TARGET_UNWIND_EMIT arm_unwind_emit
348 /* EABI unwinding tables use a different format for the typeinfo tables. */
349 #undef TARGET_ASM_TTYPE
350 #define TARGET_ASM_TTYPE arm_output_ttype
352 #undef TARGET_ARM_EABI_UNWINDER
353 #define TARGET_ARM_EABI_UNWINDER true
354 #endif /* TARGET_UNWIND_INFO */
356 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
357 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
359 #undef TARGET_CANNOT_COPY_INSN_P
360 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
363 #undef TARGET_HAVE_TLS
364 #define TARGET_HAVE_TLS true
367 #undef TARGET_CANNOT_FORCE_CONST_MEM
368 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
370 #undef TARGET_MAX_ANCHOR_OFFSET
371 #define TARGET_MAX_ANCHOR_OFFSET 4095
373 /* The minimum is set such that the total size of the block
374 for a particular anchor is -4088 + 1 + 4095 bytes, which is
375 divisible by eight, ensuring natural spacing of anchors. */
376 #undef TARGET_MIN_ANCHOR_OFFSET
377 #define TARGET_MIN_ANCHOR_OFFSET -4088
379 #undef TARGET_SCHED_ISSUE_RATE
380 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
382 #undef TARGET_MANGLE_TYPE
383 #define TARGET_MANGLE_TYPE arm_mangle_type
386 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
387 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
390 struct gcc_target targetm = TARGET_INITIALIZER;
392 /* Obstack for minipool constant handling. */
393 static struct obstack minipool_obstack;
394 static char * minipool_startobj;
396 /* The maximum number of insns skipped which
397 will be conditionalised if possible. */
398 static int max_insns_skipped = 5;
400 extern FILE * asm_out_file;
402 /* True if we are currently building a constant table. */
403 int making_const_table;
405 /* Define the information needed to generate branch insns. This is
406 stored from the compare operation. */
407 rtx arm_compare_op0, arm_compare_op1;
409 /* The processor for which instructions should be scheduled. */
410 enum processor_type arm_tune = arm_none;
412 /* The default processor used if not overridden by commandline. */
413 static enum processor_type arm_default_cpu = arm_none;
415 /* Which floating point model to use. */
416 enum arm_fp_model arm_fp_model;
418 /* Which floating point hardware is available. */
419 enum fputype arm_fpu_arch;
421 /* Which floating point hardware to schedule for. */
422 enum fputype arm_fpu_tune;
424 /* Whether to use floating point hardware. */
425 enum float_abi_type arm_float_abi;
427 /* Which ABI to use. */
428 enum arm_abi_type arm_abi;
430 /* Which thread pointer model to use. */
431 enum arm_tp_type target_thread_pointer = TP_AUTO;
433 /* Used to parse -mstructure_size_boundary command line option. */
434 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
436 /* Used for Thumb call_via trampolines. */
437 rtx thumb_call_via_label[14];
438 static int thumb_call_reg_needed;
440 /* Bit values used to identify processor capabilities. */
441 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
442 #define FL_ARCH3M (1 << 1) /* Extended multiply */
443 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
444 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
445 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
446 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
447 #define FL_THUMB (1 << 6) /* Thumb aware */
448 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
449 #define FL_STRONG (1 << 8) /* StrongARM */
450 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
451 #define FL_XSCALE (1 << 10) /* XScale */
452 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
453 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
454 media instructions. */
455 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
456 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
457 Note: ARM6 & 7 derivatives only. */
458 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
459 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
460 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
462 #define FL_DIV (1 << 18) /* Hardware divide. */
463 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
464 #define FL_NEON (1 << 20) /* Neon instructions. */
466 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
468 #define FL_FOR_ARCH2 FL_NOTM
469 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
470 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
471 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
472 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
473 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
474 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
475 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
476 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
477 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
478 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
479 #define FL_FOR_ARCH6J FL_FOR_ARCH6
480 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
481 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
482 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
483 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
484 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
485 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
486 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
487 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
488 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
490 /* The bits in this mask specify which
491 instructions we are allowed to generate. */
492 static unsigned long insn_flags = 0;
494 /* The bits in this mask specify which instruction scheduling options should
496 static unsigned long tune_flags = 0;
498 /* The following are used in the arm.md file as equivalents to bits
499 in the above two flag variables. */
501 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
504 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
507 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
510 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
513 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
516 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
519 /* Nonzero if this chip supports the ARM 6K extensions. */
522 /* Nonzero if instructions not present in the 'M' profile can be used. */
523 int arm_arch_notm = 0;
525 /* Nonzero if this chip can benefit from load scheduling. */
526 int arm_ld_sched = 0;
528 /* Nonzero if this chip is a StrongARM. */
529 int arm_tune_strongarm = 0;
531 /* Nonzero if this chip is a Cirrus variant. */
532 int arm_arch_cirrus = 0;
534 /* Nonzero if this chip supports Intel Wireless MMX technology. */
535 int arm_arch_iwmmxt = 0;
537 /* Nonzero if this chip is an XScale. */
538 int arm_arch_xscale = 0;
540 /* Nonzero if tuning for XScale */
541 int arm_tune_xscale = 0;
543 /* Nonzero if we want to tune for stores that access the write-buffer.
544 This typically means an ARM6 or ARM7 with MMU or MPU. */
545 int arm_tune_wbuf = 0;
547 /* Nonzero if tuning for Cortex-A9. */
548 int arm_tune_cortex_a9 = 0;
550 /* Nonzero if generating Thumb instructions. */
553 /* Nonzero if we should define __THUMB_INTERWORK__ in the
555 XXX This is a bit of a hack, it's intended to help work around
556 problems in GLD which doesn't understand that armv5t code is
557 interworking clean. */
558 int arm_cpp_interwork = 0;
560 /* Nonzero if chip supports Thumb 2. */
563 /* Nonzero if chip supports integer division instruction. */
566 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
567 must report the mode of the memory reference from PRINT_OPERAND to
568 PRINT_OPERAND_ADDRESS. */
569 enum machine_mode output_memory_reference_mode;
571 /* The register number to be used for the PIC offset register. */
572 unsigned arm_pic_register = INVALID_REGNUM;
574 /* Set to 1 when a return insn is output, this means that the epilogue
576 int return_used_this_function;
578 /* Set to 1 after arm_reorg has started. Reset to start at the start of
579 the next function. */
580 static int after_arm_reorg = 0;
582 /* The maximum number of insns to be used when loading a constant. */
583 static int arm_constant_limit = 3;
585 /* For an explanation of these variables, see final_prescan_insn below. */
587 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
588 enum arm_cond_code arm_current_cc;
590 int arm_target_label;
591 /* The number of conditionally executed insns, including the current insn. */
592 int arm_condexec_count = 0;
593 /* A bitmask specifying the patterns for the IT block.
594 Zero means do not output an IT block before this insn. */
595 int arm_condexec_mask = 0;
596 /* The number of bits used in arm_condexec_mask. */
597 int arm_condexec_masklen = 0;
599 /* The condition codes of the ARM, and the inverse function. */
600 static const char * const arm_condition_codes[] =
602 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
603 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
606 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
607 #define streq(string1, string2) (strcmp (string1, string2) == 0)
609 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
610 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
611 | (1 << PIC_OFFSET_TABLE_REGNUM)))
613 /* Initialization code. */
617 const char *const name;
618 enum processor_type core;
620 const unsigned long flags;
621 bool (* rtx_costs) (rtx, int, int, int *);
624 /* Not all of these give usefully different compilation alternatives,
625 but there is no simple way of generalizing them. */
626 static const struct processors all_cores[] =
629 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
630 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
631 #include "arm-cores.def"
633 {NULL, arm_none, NULL, 0, NULL}
636 static const struct processors all_architectures[] =
638 /* ARM Architectures */
639 /* We don't specify rtx_costs here as it will be figured out
642 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
643 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
644 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
645 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
646 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
647 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
648 implementations that support it, so we will leave it out for now. */
649 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
650 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
651 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
652 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
653 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
654 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
655 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
656 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
657 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
658 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
659 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
660 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
661 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
662 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
663 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
664 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
665 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
666 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
667 {NULL, arm_none, NULL, 0 , NULL}
670 struct arm_cpu_select
674 const struct processors * processors;
677 /* This is a magic structure. The 'string' field is magically filled in
678 with a pointer to the value specified by the user on the command line
679 assuming that the user has specified such a value. */
681 static struct arm_cpu_select arm_select[] =
683 /* string name processors */
684 { NULL, "-mcpu=", all_cores },
685 { NULL, "-march=", all_architectures },
686 { NULL, "-mtune=", all_cores }
689 /* Defines representing the indexes into the above table. */
690 #define ARM_OPT_SET_CPU 0
691 #define ARM_OPT_SET_ARCH 1
692 #define ARM_OPT_SET_TUNE 2
694 /* The name of the preprocessor macro to define for this architecture. */
696 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
705 /* Available values for -mfpu=. */
707 static const struct fpu_desc all_fpus[] =
709 {"fpa", FPUTYPE_FPA},
710 {"fpe2", FPUTYPE_FPA_EMU2},
711 {"fpe3", FPUTYPE_FPA_EMU2},
712 {"maverick", FPUTYPE_MAVERICK},
713 {"vfp", FPUTYPE_VFP},
714 {"vfp3", FPUTYPE_VFP3},
715 {"vfpv3", FPUTYPE_VFP3},
716 {"vfpv3-d16", FPUTYPE_VFP3D16},
717 {"neon", FPUTYPE_NEON}
721 /* Floating point models used by the different hardware.
722 See fputype in arm.h. */
724 static const enum fputype fp_model_for_fpu[] =
726 /* No FP hardware. */
727 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
728 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
729 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
730 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
731 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
732 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
733 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */
734 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
735 ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
742 enum float_abi_type abi_type;
746 /* Available values for -mfloat-abi=. */
748 static const struct float_abi all_float_abis[] =
750 {"soft", ARM_FLOAT_ABI_SOFT},
751 {"softfp", ARM_FLOAT_ABI_SOFTFP},
752 {"hard", ARM_FLOAT_ABI_HARD}
759 enum arm_abi_type abi_type;
763 /* Available values for -mabi=. */
765 static const struct abi_name arm_all_abis[] =
767 {"apcs-gnu", ARM_ABI_APCS},
768 {"atpcs", ARM_ABI_ATPCS},
769 {"aapcs", ARM_ABI_AAPCS},
770 {"iwmmxt", ARM_ABI_IWMMXT},
771 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
774 /* Supported TLS relocations. */
784 /* Emit an insn that's a simple single-set. Both the operands must be known
787 emit_set_insn (rtx x, rtx y)
789 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
792 /* Return the number of bits set in VALUE. */
794 bit_count (unsigned long value)
796 unsigned long count = 0;
801 value &= value - 1; /* Clear the least-significant set bit. */
807 /* Set up library functions unique to ARM. */
810 arm_init_libfuncs (void)
812 /* There are no special library functions unless we are using the
817 /* The functions below are described in Section 4 of the "Run-Time
818 ABI for the ARM architecture", Version 1.0. */
820 /* Double-precision floating-point arithmetic. Table 2. */
821 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
822 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
823 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
824 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
825 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
827 /* Double-precision comparisons. Table 3. */
828 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
829 set_optab_libfunc (ne_optab, DFmode, NULL);
830 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
831 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
832 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
833 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
834 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
836 /* Single-precision floating-point arithmetic. Table 4. */
837 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
838 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
839 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
840 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
841 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
843 /* Single-precision comparisons. Table 5. */
844 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
845 set_optab_libfunc (ne_optab, SFmode, NULL);
846 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
847 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
848 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
849 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
850 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
852 /* Floating-point to integer conversions. Table 6. */
853 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
854 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
855 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
856 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
857 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
858 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
859 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
860 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
862 /* Conversions between floating types. Table 7. */
863 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
864 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
866 /* Integer to floating-point conversions. Table 8. */
867 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
868 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
869 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
870 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
871 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
872 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
873 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
874 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
876 /* Long long. Table 9. */
877 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
878 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
879 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
880 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
881 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
882 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
883 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
884 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
886 /* Integer (32/32->32) division. \S 4.3.1. */
887 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
888 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
890 /* The divmod functions are designed so that they can be used for
891 plain division, even though they return both the quotient and the
892 remainder. The quotient is returned in the usual location (i.e.,
893 r0 for SImode, {r0, r1} for DImode), just as would be expected
894 for an ordinary division routine. Because the AAPCS calling
895 conventions specify that all of { r0, r1, r2, r3 } are
896 callee-saved registers, there is no need to tell the compiler
897 explicitly that those registers are clobbered by these
899 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
900 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
902 /* For SImode division the ABI provides div-without-mod routines,
904 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
905 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
907 /* We don't have mod libcalls. Fortunately gcc knows how to use the
908 divmod libcalls instead. */
909 set_optab_libfunc (smod_optab, DImode, NULL);
910 set_optab_libfunc (umod_optab, DImode, NULL);
911 set_optab_libfunc (smod_optab, SImode, NULL);
912 set_optab_libfunc (umod_optab, SImode, NULL);
915 /* Implement TARGET_HANDLE_OPTION. */
918 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
923 arm_select[1].string = arg;
927 arm_select[0].string = arg;
930 case OPT_mhard_float:
931 target_float_abi_name = "hard";
934 case OPT_msoft_float:
935 target_float_abi_name = "soft";
939 arm_select[2].string = arg;
948 arm_target_help (void)
951 static int columns = 0;
954 /* If we have not done so already, obtain the desired maximum width of
955 the output. Note - this is a duplication of the code at the start of
956 gcc/opts.c:print_specific_help() - the two copies should probably be
957 replaced by a single function. */
962 GET_ENVIRONMENT (p, "COLUMNS");
965 int value = atoi (p);
972 /* Use a reasonable default. */
976 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
978 /* The - 2 is because we know that the last entry in the array is NULL. */
979 i = ARRAY_SIZE (all_cores) - 2;
981 printf (" %s", all_cores[i].name);
982 remaining = columns - (strlen (all_cores[i].name) + 4);
983 gcc_assert (remaining >= 0);
987 int len = strlen (all_cores[i].name);
989 if (remaining > len + 2)
991 printf (", %s", all_cores[i].name);
992 remaining -= len + 2;
998 printf ("\n %s", all_cores[i].name);
999 remaining = columns - (len + 4);
1003 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1005 i = ARRAY_SIZE (all_architectures) - 2;
1008 printf (" %s", all_architectures[i].name);
1009 remaining = columns - (strlen (all_architectures[i].name) + 4);
1010 gcc_assert (remaining >= 0);
1014 int len = strlen (all_architectures[i].name);
1016 if (remaining > len + 2)
1018 printf (", %s", all_architectures[i].name);
1019 remaining -= len + 2;
1025 printf ("\n %s", all_architectures[i].name);
1026 remaining = columns - (len + 4);
1033 /* Fix up any incompatible options that the user has specified.
1034 This has now turned into a maze. */
1036 arm_override_options (void)
1039 enum processor_type target_arch_cpu = arm_none;
1040 enum processor_type selected_cpu = arm_none;
1042 /* Set up the flags based on the cpu/architecture selected by the user. */
1043 for (i = ARRAY_SIZE (arm_select); i--;)
1045 struct arm_cpu_select * ptr = arm_select + i;
1047 if (ptr->string != NULL && ptr->string[0] != '\0')
1049 const struct processors * sel;
1051 for (sel = ptr->processors; sel->name != NULL; sel++)
1052 if (streq (ptr->string, sel->name))
1054 /* Set the architecture define. */
1055 if (i != ARM_OPT_SET_TUNE)
1056 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1058 /* Determine the processor core for which we should
1059 tune code-generation. */
1060 if (/* -mcpu= is a sensible default. */
1061 i == ARM_OPT_SET_CPU
1062 /* -mtune= overrides -mcpu= and -march=. */
1063 || i == ARM_OPT_SET_TUNE)
1064 arm_tune = (enum processor_type) (sel - ptr->processors);
1066 /* Remember the CPU associated with this architecture.
1067 If no other option is used to set the CPU type,
1068 we'll use this to guess the most suitable tuning
1070 if (i == ARM_OPT_SET_ARCH)
1071 target_arch_cpu = sel->core;
1073 if (i == ARM_OPT_SET_CPU)
1074 selected_cpu = (enum processor_type) (sel - ptr->processors);
1076 if (i != ARM_OPT_SET_TUNE)
1078 /* If we have been given an architecture and a processor
1079 make sure that they are compatible. We only generate
1080 a warning though, and we prefer the CPU over the
1082 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1083 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1086 insn_flags = sel->flags;
1092 if (sel->name == NULL)
1093 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1097 /* Guess the tuning options from the architecture if necessary. */
1098 if (arm_tune == arm_none)
1099 arm_tune = target_arch_cpu;
1101 /* If the user did not specify a processor, choose one for them. */
1102 if (insn_flags == 0)
1104 const struct processors * sel;
1105 unsigned int sought;
1107 selected_cpu = TARGET_CPU_DEFAULT;
1108 if (selected_cpu == arm_none)
1110 #ifdef SUBTARGET_CPU_DEFAULT
1111 /* Use the subtarget default CPU if none was specified by
1113 selected_cpu = SUBTARGET_CPU_DEFAULT;
1115 /* Default to ARM6. */
1116 if (selected_cpu == arm_none)
1117 selected_cpu = arm6;
1119 sel = &all_cores[selected_cpu];
1121 insn_flags = sel->flags;
1123 /* Now check to see if the user has specified some command line
1124 switch that require certain abilities from the cpu. */
1127 if (TARGET_INTERWORK || TARGET_THUMB)
1129 sought |= (FL_THUMB | FL_MODE32);
1131 /* There are no ARM processors that support both APCS-26 and
1132 interworking. Therefore we force FL_MODE26 to be removed
1133 from insn_flags here (if it was set), so that the search
1134 below will always be able to find a compatible processor. */
1135 insn_flags &= ~FL_MODE26;
1138 if (sought != 0 && ((sought & insn_flags) != sought))
1140 /* Try to locate a CPU type that supports all of the abilities
1141 of the default CPU, plus the extra abilities requested by
1143 for (sel = all_cores; sel->name != NULL; sel++)
1144 if ((sel->flags & sought) == (sought | insn_flags))
1147 if (sel->name == NULL)
1149 unsigned current_bit_count = 0;
1150 const struct processors * best_fit = NULL;
1152 /* Ideally we would like to issue an error message here
1153 saying that it was not possible to find a CPU compatible
1154 with the default CPU, but which also supports the command
1155 line options specified by the programmer, and so they
1156 ought to use the -mcpu=<name> command line option to
1157 override the default CPU type.
1159 If we cannot find a cpu that has both the
1160 characteristics of the default cpu and the given
1161 command line options we scan the array again looking
1162 for a best match. */
1163 for (sel = all_cores; sel->name != NULL; sel++)
1164 if ((sel->flags & sought) == sought)
1168 count = bit_count (sel->flags & insn_flags);
1170 if (count >= current_bit_count)
1173 current_bit_count = count;
1177 gcc_assert (best_fit);
1181 insn_flags = sel->flags;
1183 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1184 arm_default_cpu = (enum processor_type) (sel - all_cores);
1185 if (arm_tune == arm_none)
1186 arm_tune = arm_default_cpu;
1189 /* The processor for which we should tune should now have been
1191 gcc_assert (arm_tune != arm_none);
1193 tune_flags = all_cores[(int)arm_tune].flags;
1195 if (target_abi_name)
1197 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1199 if (streq (arm_all_abis[i].name, target_abi_name))
1201 arm_abi = arm_all_abis[i].abi_type;
1205 if (i == ARRAY_SIZE (arm_all_abis))
1206 error ("invalid ABI option: -mabi=%s", target_abi_name);
1209 arm_abi = ARM_DEFAULT_ABI;
1211 /* Make sure that the processor choice does not conflict with any of the
1212 other command line choices. */
1213 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1214 error ("target CPU does not support ARM mode");
1216 /* BPABI targets use linker tricks to allow interworking on cores
1217 without thumb support. */
1218 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1220 warning (0, "target CPU does not support interworking" );
1221 target_flags &= ~MASK_INTERWORK;
1224 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1226 warning (0, "target CPU does not support THUMB instructions");
1227 target_flags &= ~MASK_THUMB;
1230 if (TARGET_APCS_FRAME && TARGET_THUMB)
1232 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1233 target_flags &= ~MASK_APCS_FRAME;
1236 /* Callee super interworking implies thumb interworking. Adding
1237 this to the flags here simplifies the logic elsewhere. */
1238 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1239 target_flags |= MASK_INTERWORK;
1241 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1242 from here where no function is being compiled currently. */
1243 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1244 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1246 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1247 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1249 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1250 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1252 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1254 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1255 target_flags |= MASK_APCS_FRAME;
1258 if (TARGET_POKE_FUNCTION_NAME)
1259 target_flags |= MASK_APCS_FRAME;
1261 if (TARGET_APCS_REENT && flag_pic)
1262 error ("-fpic and -mapcs-reent are incompatible");
1264 if (TARGET_APCS_REENT)
1265 warning (0, "APCS reentrant code not supported. Ignored");
1267 /* If this target is normally configured to use APCS frames, warn if they
1268 are turned off and debugging is turned on. */
1270 && write_symbols != NO_DEBUG
1271 && !TARGET_APCS_FRAME
1272 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1273 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1275 if (TARGET_APCS_FLOAT)
1276 warning (0, "passing floating point arguments in fp regs not yet supported");
1278 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1279 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1280 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1281 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1282 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1283 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1284 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1285 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1286 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1287 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1288 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1289 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1291 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1292 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1293 thumb_code = (TARGET_ARM == 0);
1294 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1295 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1296 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1297 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1298 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1300 /* If we are not using the default (ARM mode) section anchor offset
1301 ranges, then set the correct ranges now. */
1304 /* Thumb-1 LDR instructions cannot have negative offsets.
1305 Permissible positive offset ranges are 5-bit (for byte loads),
1306 6-bit (for halfword loads), or 7-bit (for word loads).
1307 Empirical results suggest a 7-bit anchor range gives the best
1308 overall code size. */
1309 targetm.min_anchor_offset = 0;
1310 targetm.max_anchor_offset = 127;
1312 else if (TARGET_THUMB2)
1314 /* The minimum is set such that the total size of the block
1315 for a particular anchor is 248 + 1 + 4095 bytes, which is
1316 divisible by eight, ensuring natural spacing of anchors. */
1317 targetm.min_anchor_offset = -248;
1318 targetm.max_anchor_offset = 4095;
1321 /* V5 code we generate is completely interworking capable, so we turn off
1322 TARGET_INTERWORK here to avoid many tests later on. */
1324 /* XXX However, we must pass the right pre-processor defines to CPP
1325 or GLD can get confused. This is a hack. */
1326 if (TARGET_INTERWORK)
1327 arm_cpp_interwork = 1;
1330 target_flags &= ~MASK_INTERWORK;
1332 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1333 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1335 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1336 error ("iwmmxt abi requires an iwmmxt capable cpu");
1338 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1339 if (target_fpu_name == NULL && target_fpe_name != NULL)
1341 if (streq (target_fpe_name, "2"))
1342 target_fpu_name = "fpe2";
1343 else if (streq (target_fpe_name, "3"))
1344 target_fpu_name = "fpe3";
1346 error ("invalid floating point emulation option: -mfpe=%s",
1349 if (target_fpu_name != NULL)
1351 /* The user specified a FPU. */
1352 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1354 if (streq (all_fpus[i].name, target_fpu_name))
1356 arm_fpu_arch = all_fpus[i].fpu;
1357 arm_fpu_tune = arm_fpu_arch;
1358 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1362 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1363 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1367 #ifdef FPUTYPE_DEFAULT
1368 /* Use the default if it is specified for this platform. */
1369 arm_fpu_arch = FPUTYPE_DEFAULT;
1370 arm_fpu_tune = FPUTYPE_DEFAULT;
1372 /* Pick one based on CPU type. */
1373 /* ??? Some targets assume FPA is the default.
1374 if ((insn_flags & FL_VFP) != 0)
1375 arm_fpu_arch = FPUTYPE_VFP;
1378 if (arm_arch_cirrus)
1379 arm_fpu_arch = FPUTYPE_MAVERICK;
1381 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1383 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1384 arm_fpu_tune = FPUTYPE_FPA;
1386 arm_fpu_tune = arm_fpu_arch;
1387 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1388 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1391 if (target_float_abi_name != NULL)
1393 /* The user specified a FP ABI. */
1394 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1396 if (streq (all_float_abis[i].name, target_float_abi_name))
1398 arm_float_abi = all_float_abis[i].abi_type;
1402 if (i == ARRAY_SIZE (all_float_abis))
1403 error ("invalid floating point abi: -mfloat-abi=%s",
1404 target_float_abi_name);
1407 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1409 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1410 sorry ("-mfloat-abi=hard and VFP");
1412 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1413 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1414 will ever exist. GCC makes no attempt to support this combination. */
1415 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1416 sorry ("iWMMXt and hardware floating point");
1418 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1419 if (TARGET_THUMB2 && TARGET_IWMMXT)
1420 sorry ("Thumb-2 iWMMXt");
1422 /* If soft-float is specified then don't use FPU. */
1423 if (TARGET_SOFT_FLOAT)
1424 arm_fpu_arch = FPUTYPE_NONE;
1426 /* For arm2/3 there is no need to do any scheduling if there is only
1427 a floating point emulator, or we are doing software floating-point. */
1428 if ((TARGET_SOFT_FLOAT
1429 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1430 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1431 && (tune_flags & FL_MODE32) == 0)
1432 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1434 if (target_thread_switch)
1436 if (strcmp (target_thread_switch, "soft") == 0)
1437 target_thread_pointer = TP_SOFT;
1438 else if (strcmp (target_thread_switch, "auto") == 0)
1439 target_thread_pointer = TP_AUTO;
1440 else if (strcmp (target_thread_switch, "cp15") == 0)
1441 target_thread_pointer = TP_CP15;
1443 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1446 /* Use the cp15 method if it is available. */
1447 if (target_thread_pointer == TP_AUTO)
1449 if (arm_arch6k && !TARGET_THUMB)
1450 target_thread_pointer = TP_CP15;
1452 target_thread_pointer = TP_SOFT;
1455 if (TARGET_HARD_TP && TARGET_THUMB1)
1456 error ("can not use -mtp=cp15 with 16-bit Thumb");
1458 /* Override the default structure alignment for AAPCS ABI. */
1459 if (TARGET_AAPCS_BASED)
1460 arm_structure_size_boundary = 8;
1462 if (structure_size_string != NULL)
1464 int size = strtol (structure_size_string, NULL, 0);
1466 if (size == 8 || size == 32
1467 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1468 arm_structure_size_boundary = size;
1470 warning (0, "structure size boundary can only be set to %s",
1471 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1474 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1476 error ("RTP PIC is incompatible with Thumb");
1480 /* If stack checking is disabled, we can use r10 as the PIC register,
1481 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1482 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1484 if (TARGET_VXWORKS_RTP)
1485 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1486 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1489 if (flag_pic && TARGET_VXWORKS_RTP)
1490 arm_pic_register = 9;
1492 if (arm_pic_register_string != NULL)
1494 int pic_register = decode_reg_name (arm_pic_register_string);
1497 warning (0, "-mpic-register= is useless without -fpic");
1499 /* Prevent the user from choosing an obviously stupid PIC register. */
1500 else if (pic_register < 0 || call_used_regs[pic_register]
1501 || pic_register == HARD_FRAME_POINTER_REGNUM
1502 || pic_register == STACK_POINTER_REGNUM
1503 || pic_register >= PC_REGNUM
1504 || (TARGET_VXWORKS_RTP
1505 && (unsigned int) pic_register != arm_pic_register))
1506 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1508 arm_pic_register = pic_register;
1511 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1512 if (fix_cm3_ldrd == 2)
1514 if (selected_cpu == cortexm3)
1520 /* ??? We might want scheduling for thumb2. */
1521 if (TARGET_THUMB && flag_schedule_insns)
1523 /* Don't warn since it's on by default in -O2. */
1524 flag_schedule_insns = 0;
1529 arm_constant_limit = 1;
1531 /* If optimizing for size, bump the number of instructions that we
1532 are prepared to conditionally execute (even on a StrongARM). */
1533 max_insns_skipped = 6;
1537 /* For processors with load scheduling, it never costs more than
1538 2 cycles to load a constant, and the load scheduler may well
1539 reduce that to 1. */
1541 arm_constant_limit = 1;
1543 /* On XScale the longer latency of a load makes it more difficult
1544 to achieve a good schedule, so it's faster to synthesize
1545 constants that can be done in two insns. */
1546 if (arm_tune_xscale)
1547 arm_constant_limit = 2;
1549 /* StrongARM has early execution of branches, so a sequence
1550 that is worth skipping is shorter. */
1551 if (arm_tune_strongarm)
1552 max_insns_skipped = 3;
1555 /* Register global variables with the garbage collector. */
1556 arm_add_gc_roots ();
1560 arm_add_gc_roots (void)
1562 gcc_obstack_init(&minipool_obstack);
1563 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1566 /* A table of known ARM exception types.
1567 For use with the interrupt function attribute. */
1571 const char *const arg;
1572 const unsigned long return_value;
1576 static const isr_attribute_arg isr_attribute_args [] =
1578 { "IRQ", ARM_FT_ISR },
1579 { "irq", ARM_FT_ISR },
1580 { "FIQ", ARM_FT_FIQ },
1581 { "fiq", ARM_FT_FIQ },
1582 { "ABORT", ARM_FT_ISR },
1583 { "abort", ARM_FT_ISR },
1584 { "ABORT", ARM_FT_ISR },
1585 { "abort", ARM_FT_ISR },
1586 { "UNDEF", ARM_FT_EXCEPTION },
1587 { "undef", ARM_FT_EXCEPTION },
1588 { "SWI", ARM_FT_EXCEPTION },
1589 { "swi", ARM_FT_EXCEPTION },
1590 { NULL, ARM_FT_NORMAL }
1593 /* Returns the (interrupt) function type of the current
1594 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1596 static unsigned long
1597 arm_isr_value (tree argument)
1599 const isr_attribute_arg * ptr;
1603 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1605 /* No argument - default to IRQ. */
1606 if (argument == NULL_TREE)
1609 /* Get the value of the argument. */
1610 if (TREE_VALUE (argument) == NULL_TREE
1611 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1612 return ARM_FT_UNKNOWN;
1614 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1616 /* Check it against the list of known arguments. */
1617 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1618 if (streq (arg, ptr->arg))
1619 return ptr->return_value;
1621 /* An unrecognized interrupt type. */
1622 return ARM_FT_UNKNOWN;
1625 /* Computes the type of the current function. */
1627 static unsigned long
1628 arm_compute_func_type (void)
1630 unsigned long type = ARM_FT_UNKNOWN;
1634 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1636 /* Decide if the current function is volatile. Such functions
1637 never return, and many memory cycles can be saved by not storing
1638 register values that will never be needed again. This optimization
1639 was added to speed up context switching in a kernel application. */
1641 && (TREE_NOTHROW (current_function_decl)
1642 || !(flag_unwind_tables
1643 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1644 && TREE_THIS_VOLATILE (current_function_decl))
1645 type |= ARM_FT_VOLATILE;
1647 if (cfun->static_chain_decl != NULL)
1648 type |= ARM_FT_NESTED;
1650 attr = DECL_ATTRIBUTES (current_function_decl);
1652 a = lookup_attribute ("naked", attr);
1654 type |= ARM_FT_NAKED;
1656 a = lookup_attribute ("isr", attr);
1658 a = lookup_attribute ("interrupt", attr);
1661 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1663 type |= arm_isr_value (TREE_VALUE (a));
1668 /* Returns the type of the current function. */
1671 arm_current_func_type (void)
1673 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1674 cfun->machine->func_type = arm_compute_func_type ();
1676 return cfun->machine->func_type;
1680 arm_allocate_stack_slots_for_args (void)
1682 /* Naked functions should not allocate stack slots for arguments. */
1683 return !IS_NAKED (arm_current_func_type ());
1687 /* Return 1 if it is possible to return using a single instruction.
1688 If SIBLING is non-null, this is a test for a return before a sibling
1689 call. SIBLING is the call insn, so we can examine its register usage. */
1692 use_return_insn (int iscond, rtx sibling)
1695 unsigned int func_type;
1696 unsigned long saved_int_regs;
1697 unsigned HOST_WIDE_INT stack_adjust;
1698 arm_stack_offsets *offsets;
1700 /* Never use a return instruction before reload has run. */
1701 if (!reload_completed)
1704 func_type = arm_current_func_type ();
1706 /* Naked, volatile and stack alignment functions need special
1708 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
1711 /* So do interrupt functions that use the frame pointer and Thumb
1712 interrupt functions. */
1713 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
1716 offsets = arm_get_frame_offsets ();
1717 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1719 /* As do variadic functions. */
1720 if (crtl->args.pretend_args_size
1721 || cfun->machine->uses_anonymous_args
1722 /* Or if the function calls __builtin_eh_return () */
1723 || crtl->calls_eh_return
1724 /* Or if the function calls alloca */
1725 || cfun->calls_alloca
1726 /* Or if there is a stack adjustment. However, if the stack pointer
1727 is saved on the stack, we can use a pre-incrementing stack load. */
1728 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
1729 && stack_adjust == 4)))
1732 saved_int_regs = offsets->saved_regs_mask;
1734 /* Unfortunately, the insn
1736 ldmib sp, {..., sp, ...}
1738 triggers a bug on most SA-110 based devices, such that the stack
1739 pointer won't be correctly restored if the instruction takes a
1740 page fault. We work around this problem by popping r3 along with
1741 the other registers, since that is never slower than executing
1742 another instruction.
1744 We test for !arm_arch5 here, because code for any architecture
1745 less than this could potentially be run on one of the buggy
1747 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
1749 /* Validate that r3 is a call-clobbered register (always true in
1750 the default abi) ... */
1751 if (!call_used_regs[3])
1754 /* ... that it isn't being used for a return value ... */
1755 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1758 /* ... or for a tail-call argument ... */
1761 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1763 if (find_regno_fusage (sibling, USE, 3))
1767 /* ... and that there are no call-saved registers in r0-r2
1768 (always true in the default ABI). */
1769 if (saved_int_regs & 0x7)
1773 /* Can't be done if interworking with Thumb, and any registers have been
1775 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
1778 /* On StrongARM, conditional returns are expensive if they aren't
1779 taken and multiple registers have been stacked. */
1780 if (iscond && arm_tune_strongarm)
1782 /* Conditional return when just the LR is stored is a simple
1783 conditional-load instruction, that's not expensive. */
1784 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1788 && arm_pic_register != INVALID_REGNUM
1789 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
1793 /* If there are saved registers but the LR isn't saved, then we need
1794 two instructions for the return. */
1795 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1798 /* Can't be done if any of the FPA regs are pushed,
1799 since this also requires an insn. */
1800 if (TARGET_HARD_FLOAT && TARGET_FPA)
1801 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1802 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1805 /* Likewise VFP regs. */
1806 if (TARGET_HARD_FLOAT && TARGET_VFP)
1807 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1808 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1811 if (TARGET_REALLY_IWMMXT)
1812 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1813 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
1819 /* Return TRUE if int I is a valid immediate ARM constant. */
1822 const_ok_for_arm (HOST_WIDE_INT i)
1826 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1827 be all zero, or all one. */
1828 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1829 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1830 != ((~(unsigned HOST_WIDE_INT) 0)
1831 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1834 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1836 /* Fast return for 0 and small values. We must do this for zero, since
1837 the code below can't handle that one case. */
1838 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1841 /* Get the number of trailing zeros. */
1842 lowbit = ffs((int) i) - 1;
1844 /* Only even shifts are allowed in ARM mode so round down to the
1845 nearest even number. */
1849 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1854 /* Allow rotated constants in ARM mode. */
1856 && ((i & ~0xc000003f) == 0
1857 || (i & ~0xf000000f) == 0
1858 || (i & ~0xfc000003) == 0))
1865 /* Allow repeated pattern. */
1868 if (i == v || i == (v | (v << 8)))
1875 /* Return true if I is a valid constant for the operation CODE. */
1877 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1879 if (const_ok_for_arm (i))
1885 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
1887 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
1893 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
1900 /* Emit a sequence of insns to handle a large constant.
1901 CODE is the code of the operation required, it can be any of SET, PLUS,
1902 IOR, AND, XOR, MINUS;
1903 MODE is the mode in which the operation is being performed;
1904 VAL is the integer to operate on;
1905 SOURCE is the other operand (a register, or a null-pointer for SET);
1906 SUBTARGETS means it is safe to create scratch registers if that will
1907 either produce a simpler sequence, or we will want to cse the values.
1908 Return value is the number of insns emitted. */
1910 /* ??? Tweak this for thumb2. */
1912 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1913 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
1917 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
1918 cond = COND_EXEC_TEST (PATTERN (insn));
1922 if (subtargets || code == SET
1923 || (GET_CODE (target) == REG && GET_CODE (source) == REG
1924 && REGNO (target) != REGNO (source)))
1926 /* After arm_reorg has been called, we can't fix up expensive
1927 constants by pushing them into memory so we must synthesize
1928 them in-line, regardless of the cost. This is only likely to
1929 be more costly on chips that have load delay slots and we are
1930 compiling without running the scheduler (so no splitting
1931 occurred before the final instruction emission).
1933 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1935 if (!after_arm_reorg
1937 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1939 > arm_constant_limit + (code != SET)))
1943 /* Currently SET is the only monadic value for CODE, all
1944 the rest are diadic. */
1945 if (TARGET_USE_MOVT)
1946 arm_emit_movpair (target, GEN_INT (val));
1948 emit_set_insn (target, GEN_INT (val));
1954 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
1956 if (TARGET_USE_MOVT)
1957 arm_emit_movpair (temp, GEN_INT (val));
1959 emit_set_insn (temp, GEN_INT (val));
1961 /* For MINUS, the value is subtracted from, since we never
1962 have subtraction of a constant. */
1964 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
1966 emit_set_insn (target,
1967 gen_rtx_fmt_ee (code, mode, source, temp));
1973 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
1977 /* Return the number of ARM instructions required to synthesize the given
1980 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
1982 HOST_WIDE_INT temp1;
1990 if (remainder & (3 << (i - 2)))
1995 temp1 = remainder & ((0x0ff << end)
1996 | ((i < end) ? (0xff >> (32 - end)) : 0));
1997 remainder &= ~temp1;
2002 } while (remainder);
2006 /* Emit an instruction with the indicated PATTERN. If COND is
2007 non-NULL, conditionalize the execution of the instruction on COND
2011 emit_constant_insn (rtx cond, rtx pattern)
2014 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2015 emit_insn (pattern);
2018 /* As above, but extra parameter GENERATE which, if clear, suppresses
2020 /* ??? This needs more work for thumb2. */
2023 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2024 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2029 int can_negate_initial = 0;
2032 int num_bits_set = 0;
2033 int set_sign_bit_copies = 0;
2034 int clear_sign_bit_copies = 0;
2035 int clear_zero_bit_copies = 0;
2036 int set_zero_bit_copies = 0;
2038 unsigned HOST_WIDE_INT temp1, temp2;
2039 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2041 /* Find out which operations are safe for a given CODE. Also do a quick
2042 check for degenerate cases; these can occur when DImode operations
2054 can_negate_initial = 1;
2058 if (remainder == 0xffffffff)
2061 emit_constant_insn (cond,
2062 gen_rtx_SET (VOIDmode, target,
2063 GEN_INT (ARM_SIGN_EXTEND (val))));
2068 if (reload_completed && rtx_equal_p (target, source))
2071 emit_constant_insn (cond,
2072 gen_rtx_SET (VOIDmode, target, source));
2081 emit_constant_insn (cond,
2082 gen_rtx_SET (VOIDmode, target, const0_rtx));
2085 if (remainder == 0xffffffff)
2087 if (reload_completed && rtx_equal_p (target, source))
2090 emit_constant_insn (cond,
2091 gen_rtx_SET (VOIDmode, target, source));
2100 if (reload_completed && rtx_equal_p (target, source))
2103 emit_constant_insn (cond,
2104 gen_rtx_SET (VOIDmode, target, source));
2108 /* We don't know how to handle other cases yet. */
2109 gcc_assert (remainder == 0xffffffff);
2112 emit_constant_insn (cond,
2113 gen_rtx_SET (VOIDmode, target,
2114 gen_rtx_NOT (mode, source)));
2118 /* We treat MINUS as (val - source), since (source - val) is always
2119 passed as (source + (-val)). */
2123 emit_constant_insn (cond,
2124 gen_rtx_SET (VOIDmode, target,
2125 gen_rtx_NEG (mode, source)));
2128 if (const_ok_for_arm (val))
2131 emit_constant_insn (cond,
2132 gen_rtx_SET (VOIDmode, target,
2133 gen_rtx_MINUS (mode, GEN_INT (val),
2145 /* If we can do it in one insn get out quickly. */
2146 if (const_ok_for_arm (val)
2147 || (can_negate_initial && const_ok_for_arm (-val))
2148 || (can_invert && const_ok_for_arm (~val)))
2151 emit_constant_insn (cond,
2152 gen_rtx_SET (VOIDmode, target,
2154 ? gen_rtx_fmt_ee (code, mode, source,
2160 /* Calculate a few attributes that may be useful for specific
2162 for (i = 31; i >= 0; i--)
2164 if ((remainder & (1 << i)) == 0)
2165 clear_sign_bit_copies++;
2170 for (i = 31; i >= 0; i--)
2172 if ((remainder & (1 << i)) != 0)
2173 set_sign_bit_copies++;
2178 for (i = 0; i <= 31; i++)
2180 if ((remainder & (1 << i)) == 0)
2181 clear_zero_bit_copies++;
2186 for (i = 0; i <= 31; i++)
2188 if ((remainder & (1 << i)) != 0)
2189 set_zero_bit_copies++;
2197 /* See if we can use movw. */
2198 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2201 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2206 /* See if we can do this by sign_extending a constant that is known
2207 to be negative. This is a good, way of doing it, since the shift
2208 may well merge into a subsequent insn. */
2209 if (set_sign_bit_copies > 1)
2211 if (const_ok_for_arm
2212 (temp1 = ARM_SIGN_EXTEND (remainder
2213 << (set_sign_bit_copies - 1))))
2217 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2218 emit_constant_insn (cond,
2219 gen_rtx_SET (VOIDmode, new_src,
2221 emit_constant_insn (cond,
2222 gen_ashrsi3 (target, new_src,
2223 GEN_INT (set_sign_bit_copies - 1)));
2227 /* For an inverted constant, we will need to set the low bits,
2228 these will be shifted out of harm's way. */
2229 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2230 if (const_ok_for_arm (~temp1))
2234 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2235 emit_constant_insn (cond,
2236 gen_rtx_SET (VOIDmode, new_src,
2238 emit_constant_insn (cond,
2239 gen_ashrsi3 (target, new_src,
2240 GEN_INT (set_sign_bit_copies - 1)));
2246 /* See if we can calculate the value as the difference between two
2247 valid immediates. */
2248 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2250 int topshift = clear_sign_bit_copies & ~1;
2252 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2253 & (0xff000000 >> topshift));
2255 /* If temp1 is zero, then that means the 9 most significant
2256 bits of remainder were 1 and we've caused it to overflow.
2257 When topshift is 0 we don't need to do anything since we
2258 can borrow from 'bit 32'. */
2259 if (temp1 == 0 && topshift != 0)
2260 temp1 = 0x80000000 >> (topshift - 1);
2262 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2264 if (const_ok_for_arm (temp2))
2268 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2269 emit_constant_insn (cond,
2270 gen_rtx_SET (VOIDmode, new_src,
2272 emit_constant_insn (cond,
2273 gen_addsi3 (target, new_src,
2281 /* See if we can generate this by setting the bottom (or the top)
2282 16 bits, and then shifting these into the other half of the
2283 word. We only look for the simplest cases, to do more would cost
2284 too much. Be careful, however, not to generate this when the
2285 alternative would take fewer insns. */
2286 if (val & 0xffff0000)
2288 temp1 = remainder & 0xffff0000;
2289 temp2 = remainder & 0x0000ffff;
2291 /* Overlaps outside this range are best done using other methods. */
2292 for (i = 9; i < 24; i++)
2294 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2295 && !const_ok_for_arm (temp2))
2297 rtx new_src = (subtargets
2298 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2300 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2301 source, subtargets, generate);
2309 gen_rtx_ASHIFT (mode, source,
2316 /* Don't duplicate cases already considered. */
2317 for (i = 17; i < 24; i++)
2319 if (((temp1 | (temp1 >> i)) == remainder)
2320 && !const_ok_for_arm (temp1))
2322 rtx new_src = (subtargets
2323 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2325 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2326 source, subtargets, generate);
2331 gen_rtx_SET (VOIDmode, target,
2334 gen_rtx_LSHIFTRT (mode, source,
2345 /* If we have IOR or XOR, and the constant can be loaded in a
2346 single instruction, and we can find a temporary to put it in,
2347 then this can be done in two instructions instead of 3-4. */
2349 /* TARGET can't be NULL if SUBTARGETS is 0 */
2350 || (reload_completed && !reg_mentioned_p (target, source)))
2352 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2356 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2358 emit_constant_insn (cond,
2359 gen_rtx_SET (VOIDmode, sub,
2361 emit_constant_insn (cond,
2362 gen_rtx_SET (VOIDmode, target,
2363 gen_rtx_fmt_ee (code, mode,
2373 if (set_sign_bit_copies > 8
2374 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2378 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2379 rtx shift = GEN_INT (set_sign_bit_copies);
2383 gen_rtx_SET (VOIDmode, sub,
2385 gen_rtx_ASHIFT (mode,
2390 gen_rtx_SET (VOIDmode, target,
2392 gen_rtx_LSHIFTRT (mode, sub,
2398 if (set_zero_bit_copies > 8
2399 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2403 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2404 rtx shift = GEN_INT (set_zero_bit_copies);
2408 gen_rtx_SET (VOIDmode, sub,
2410 gen_rtx_LSHIFTRT (mode,
2415 gen_rtx_SET (VOIDmode, target,
2417 gen_rtx_ASHIFT (mode, sub,
2423 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2427 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2428 emit_constant_insn (cond,
2429 gen_rtx_SET (VOIDmode, sub,
2430 gen_rtx_NOT (mode, source)));
2433 sub = gen_reg_rtx (mode);
2434 emit_constant_insn (cond,
2435 gen_rtx_SET (VOIDmode, sub,
2436 gen_rtx_AND (mode, source,
2438 emit_constant_insn (cond,
2439 gen_rtx_SET (VOIDmode, target,
2440 gen_rtx_NOT (mode, sub)));
2447 /* See if two shifts will do 2 or more insn's worth of work. */
2448 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2450 HOST_WIDE_INT shift_mask = ((0xffffffff
2451 << (32 - clear_sign_bit_copies))
2454 if ((remainder | shift_mask) != 0xffffffff)
2458 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2459 insns = arm_gen_constant (AND, mode, cond,
2460 remainder | shift_mask,
2461 new_src, source, subtargets, 1);
2466 rtx targ = subtargets ? NULL_RTX : target;
2467 insns = arm_gen_constant (AND, mode, cond,
2468 remainder | shift_mask,
2469 targ, source, subtargets, 0);
2475 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2476 rtx shift = GEN_INT (clear_sign_bit_copies);
2478 emit_insn (gen_ashlsi3 (new_src, source, shift));
2479 emit_insn (gen_lshrsi3 (target, new_src, shift));
2485 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2487 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2489 if ((remainder | shift_mask) != 0xffffffff)
2493 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2495 insns = arm_gen_constant (AND, mode, cond,
2496 remainder | shift_mask,
2497 new_src, source, subtargets, 1);
2502 rtx targ = subtargets ? NULL_RTX : target;
2504 insns = arm_gen_constant (AND, mode, cond,
2505 remainder | shift_mask,
2506 targ, source, subtargets, 0);
2512 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2513 rtx shift = GEN_INT (clear_zero_bit_copies);
2515 emit_insn (gen_lshrsi3 (new_src, source, shift));
2516 emit_insn (gen_ashlsi3 (target, new_src, shift));
2528 for (i = 0; i < 32; i++)
2529 if (remainder & (1 << i))
2532 if (code == AND || (can_invert && num_bits_set > 16))
2533 remainder = (~remainder) & 0xffffffff;
2534 else if (code == PLUS && num_bits_set > 16)
2535 remainder = (-remainder) & 0xffffffff;
2542 /* Now try and find a way of doing the job in either two or three
2544 We start by looking for the largest block of zeros that are aligned on
2545 a 2-bit boundary, we then fill up the temps, wrapping around to the
2546 top of the word when we drop off the bottom.
2547 In the worst case this code should produce no more than four insns.
2548 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2549 best place to start. */
2551 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2557 int best_consecutive_zeros = 0;
2559 for (i = 0; i < 32; i += 2)
2561 int consecutive_zeros = 0;
2563 if (!(remainder & (3 << i)))
2565 while ((i < 32) && !(remainder & (3 << i)))
2567 consecutive_zeros += 2;
2570 if (consecutive_zeros > best_consecutive_zeros)
2572 best_consecutive_zeros = consecutive_zeros;
2573 best_start = i - consecutive_zeros;
2579 /* So long as it won't require any more insns to do so, it's
2580 desirable to emit a small constant (in bits 0...9) in the last
2581 insn. This way there is more chance that it can be combined with
2582 a later addressing insn to form a pre-indexed load or store
2583 operation. Consider:
2585 *((volatile int *)0xe0000100) = 1;
2586 *((volatile int *)0xe0000110) = 2;
2588 We want this to wind up as:
2592 str rB, [rA, #0x100]
2594 str rB, [rA, #0x110]
2596 rather than having to synthesize both large constants from scratch.
2598 Therefore, we calculate how many insns would be required to emit
2599 the constant starting from `best_start', and also starting from
2600 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2601 yield a shorter sequence, we may as well use zero. */
2603 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2604 && (count_insns_for_constant (remainder, 0) <=
2605 count_insns_for_constant (remainder, best_start)))
2609 /* Now start emitting the insns. */
2617 if (remainder & (3 << (i - 2)))
2622 temp1 = remainder & ((0x0ff << end)
2623 | ((i < end) ? (0xff >> (32 - end)) : 0));
2624 remainder &= ~temp1;
2628 rtx new_src, temp1_rtx;
2630 if (code == SET || code == MINUS)
2632 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2633 if (can_invert && code != MINUS)
2638 if (remainder && subtargets)
2639 new_src = gen_reg_rtx (mode);
2644 else if (can_negate)
2648 temp1 = trunc_int_for_mode (temp1, mode);
2649 temp1_rtx = GEN_INT (temp1);
2653 else if (code == MINUS)
2654 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2656 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2658 emit_constant_insn (cond,
2659 gen_rtx_SET (VOIDmode, new_src,
2669 else if (code == MINUS)
2678 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
2691 /* Canonicalize a comparison so that we are more likely to recognize it.
2692 This can be done for a few constant compares, where we can make the
2693 immediate value easier to load. */
2696 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2699 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2700 unsigned HOST_WIDE_INT maxval;
2701 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2712 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2714 *op1 = GEN_INT (i + 1);
2715 return code == GT ? GE : LT;
2722 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2724 *op1 = GEN_INT (i - 1);
2725 return code == GE ? GT : LE;
2731 if (i != ~((unsigned HOST_WIDE_INT) 0)
2732 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2734 *op1 = GEN_INT (i + 1);
2735 return code == GTU ? GEU : LTU;
2742 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2744 *op1 = GEN_INT (i - 1);
2745 return code == GEU ? GTU : LEU;
2757 /* Define how to find the value returned by a function. */
2760 arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
2762 enum machine_mode mode;
2763 int unsignedp ATTRIBUTE_UNUSED;
2764 rtx r ATTRIBUTE_UNUSED;
2766 mode = TYPE_MODE (type);
2767 /* Promote integer types. */
2768 if (INTEGRAL_TYPE_P (type))
2769 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2771 /* Promotes small structs returned in a register to full-word size
2772 for big-endian AAPCS. */
2773 if (arm_return_in_msb (type))
2775 HOST_WIDE_INT size = int_size_in_bytes (type);
2776 if (size % UNITS_PER_WORD != 0)
2778 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2779 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2783 return LIBCALL_VALUE(mode);
2786 /* Determine the amount of memory needed to store the possible return
2787 registers of an untyped call. */
2789 arm_apply_result_size (void)
2795 if (TARGET_HARD_FLOAT_ABI)
2799 if (TARGET_MAVERICK)
2802 if (TARGET_IWMMXT_ABI)
2809 /* Decide whether a type should be returned in memory (true)
2810 or in a register (false). This is called as the target hook
2811 TARGET_RETURN_IN_MEMORY. */
2813 arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
2817 size = int_size_in_bytes (type);
2819 /* Vector values should be returned using ARM registers, not memory (unless
2820 they're over 16 bytes, which will break since we only have four
2821 call-clobbered registers to play with). */
2822 if (TREE_CODE (type) == VECTOR_TYPE)
2823 return (size < 0 || size > (4 * UNITS_PER_WORD));
2825 if (!AGGREGATE_TYPE_P (type) &&
2826 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2827 /* All simple types are returned in registers.
2828 For AAPCS, complex types are treated the same as aggregates. */
2831 if (arm_abi != ARM_ABI_APCS)
2833 /* ATPCS and later return aggregate types in memory only if they are
2834 larger than a word (or are variable size). */
2835 return (size < 0 || size > UNITS_PER_WORD);
2838 /* For the arm-wince targets we choose to be compatible with Microsoft's
2839 ARM and Thumb compilers, which always return aggregates in memory. */
2841 /* All structures/unions bigger than one word are returned in memory.
2842 Also catch the case where int_size_in_bytes returns -1. In this case
2843 the aggregate is either huge or of variable size, and in either case
2844 we will want to return it via memory and not in a register. */
2845 if (size < 0 || size > UNITS_PER_WORD)
2848 if (TREE_CODE (type) == RECORD_TYPE)
2852 /* For a struct the APCS says that we only return in a register
2853 if the type is 'integer like' and every addressable element
2854 has an offset of zero. For practical purposes this means
2855 that the structure can have at most one non bit-field element
2856 and that this element must be the first one in the structure. */
2858 /* Find the first field, ignoring non FIELD_DECL things which will
2859 have been created by C++. */
2860 for (field = TYPE_FIELDS (type);
2861 field && TREE_CODE (field) != FIELD_DECL;
2862 field = TREE_CHAIN (field))
2866 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2868 /* Check that the first field is valid for returning in a register. */
2870 /* ... Floats are not allowed */
2871 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2874 /* ... Aggregates that are not themselves valid for returning in
2875 a register are not allowed. */
2876 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
2879 /* Now check the remaining fields, if any. Only bitfields are allowed,
2880 since they are not addressable. */
2881 for (field = TREE_CHAIN (field);
2883 field = TREE_CHAIN (field))
2885 if (TREE_CODE (field) != FIELD_DECL)
2888 if (!DECL_BIT_FIELD_TYPE (field))
2895 if (TREE_CODE (type) == UNION_TYPE)
2899 /* Unions can be returned in registers if every element is
2900 integral, or can be returned in an integer register. */
2901 for (field = TYPE_FIELDS (type);
2903 field = TREE_CHAIN (field))
2905 if (TREE_CODE (field) != FIELD_DECL)
2908 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2911 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
2917 #endif /* not ARM_WINCE */
2919 /* Return all other types in memory. */
2923 /* Indicate whether or not words of a double are in big-endian order. */
2926 arm_float_words_big_endian (void)
2928 if (TARGET_MAVERICK)
2931 /* For FPA, float words are always big-endian. For VFP, floats words
2932 follow the memory system mode. */
2940 return (TARGET_BIG_END ? 1 : 0);
2945 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2946 for a call to a function whose data type is FNTYPE.
2947 For a library call, FNTYPE is NULL. */
2949 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
2950 rtx libname ATTRIBUTE_UNUSED,
2951 tree fndecl ATTRIBUTE_UNUSED)
2953 /* On the ARM, the offset starts at 0. */
2955 pcum->iwmmxt_nregs = 0;
2956 pcum->can_split = true;
2958 /* Varargs vectors are treated the same as long long.
2959 named_count avoids having to change the way arm handles 'named' */
2960 pcum->named_count = 0;
2963 if (TARGET_REALLY_IWMMXT && fntype)
2967 for (fn_arg = TYPE_ARG_TYPES (fntype);
2969 fn_arg = TREE_CHAIN (fn_arg))
2970 pcum->named_count += 1;
2972 if (! pcum->named_count)
2973 pcum->named_count = INT_MAX;
2978 /* Return true if mode/type need doubleword alignment. */
2980 arm_needs_doubleword_align (enum machine_mode mode, tree type)
2982 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
2983 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
2987 /* Determine where to put an argument to a function.
2988 Value is zero to push the argument on the stack,
2989 or a hard register in which to store the argument.
2991 MODE is the argument's machine mode.
2992 TYPE is the data type of the argument (as a tree).
2993 This is null for libcalls where that information may
2995 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2996 the preceding args and about the function being called.
2997 NAMED is nonzero if this argument is a named parameter
2998 (otherwise it is an extra parameter matching an ellipsis). */
3001 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3002 tree type, int named)
3006 /* Varargs vectors are treated the same as long long.
3007 named_count avoids having to change the way arm handles 'named' */
3008 if (TARGET_IWMMXT_ABI
3009 && arm_vector_mode_supported_p (mode)
3010 && pcum->named_count > pcum->nargs + 1)
3012 if (pcum->iwmmxt_nregs <= 9)
3013 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
3016 pcum->can_split = false;
3021 /* Put doubleword aligned quantities in even register pairs. */
3023 && ARM_DOUBLEWORD_ALIGN
3024 && arm_needs_doubleword_align (mode, type))
3027 if (mode == VOIDmode)
3028 /* Pick an arbitrary value for operand 2 of the call insn. */
3031 /* Only allow splitting an arg between regs and memory if all preceding
3032 args were allocated to regs. For args passed by reference we only count
3033 the reference pointer. */
3034 if (pcum->can_split)
3037 nregs = ARM_NUM_REGS2 (mode, type);
3039 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
3042 return gen_rtx_REG (mode, pcum->nregs);
3046 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3047 tree type, bool named ATTRIBUTE_UNUSED)
3049 int nregs = pcum->nregs;
3051 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
3054 if (NUM_ARG_REGS > nregs
3055 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
3057 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
3062 /* Variable sized types are passed by reference. This is a GCC
3063 extension to the ARM ABI. */
3066 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3067 enum machine_mode mode ATTRIBUTE_UNUSED,
3068 const_tree type, bool named ATTRIBUTE_UNUSED)
3070 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3073 /* Encode the current state of the #pragma [no_]long_calls. */
3076 OFF, /* No #pragma [no_]long_calls is in effect. */
3077 LONG, /* #pragma long_calls is in effect. */
3078 SHORT /* #pragma no_long_calls is in effect. */
3081 static arm_pragma_enum arm_pragma_long_calls = OFF;
3084 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3086 arm_pragma_long_calls = LONG;
3090 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3092 arm_pragma_long_calls = SHORT;
3096 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3098 arm_pragma_long_calls = OFF;
3101 /* Table of machine attributes. */
3102 const struct attribute_spec arm_attribute_table[] =
3104 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
3105 /* Function calls made to this symbol must be done indirectly, because
3106 it may lie outside of the 26 bit addressing range of a normal function
3108 { "long_call", 0, 0, false, true, true, NULL },
3109 /* Whereas these functions are always known to reside within the 26 bit
3110 addressing range. */
3111 { "short_call", 0, 0, false, true, true, NULL },
3112 /* Interrupt Service Routines have special prologue and epilogue requirements. */
3113 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
3114 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
3115 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3117 /* ARM/PE has three new attributes:
3119 dllexport - for exporting a function/variable that will live in a dll
3120 dllimport - for importing a function/variable from a dll
3122 Microsoft allows multiple declspecs in one __declspec, separating
3123 them with spaces. We do NOT support this. Instead, use __declspec
3126 { "dllimport", 0, 0, true, false, false, NULL },
3127 { "dllexport", 0, 0, true, false, false, NULL },
3128 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3129 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
3130 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
3131 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3132 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
3134 { NULL, 0, 0, false, false, false, NULL }
3137 /* Handle an attribute requiring a FUNCTION_DECL;
3138 arguments as in struct attribute_spec.handler. */
3140 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
3141 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
3143 if (TREE_CODE (*node) != FUNCTION_DECL)
3145 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3146 IDENTIFIER_POINTER (name));
3147 *no_add_attrs = true;
3153 /* Handle an "interrupt" or "isr" attribute;
3154 arguments as in struct attribute_spec.handler. */
3156 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
3161 if (TREE_CODE (*node) != FUNCTION_DECL)
3163 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3164 IDENTIFIER_POINTER (name));
3165 *no_add_attrs = true;
3167 /* FIXME: the argument if any is checked for type attributes;
3168 should it be checked for decl ones? */
3172 if (TREE_CODE (*node) == FUNCTION_TYPE
3173 || TREE_CODE (*node) == METHOD_TYPE)
3175 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
3177 warning (OPT_Wattributes, "%qs attribute ignored",
3178 IDENTIFIER_POINTER (name));
3179 *no_add_attrs = true;
3182 else if (TREE_CODE (*node) == POINTER_TYPE
3183 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
3184 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
3185 && arm_isr_value (args) != ARM_FT_UNKNOWN)
3187 *node = build_variant_type_copy (*node);
3188 TREE_TYPE (*node) = build_type_attribute_variant
3190 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
3191 *no_add_attrs = true;
3195 /* Possibly pass this attribute on from the type to a decl. */
3196 if (flags & ((int) ATTR_FLAG_DECL_NEXT
3197 | (int) ATTR_FLAG_FUNCTION_NEXT
3198 | (int) ATTR_FLAG_ARRAY_NEXT))
3200 *no_add_attrs = true;
3201 return tree_cons (name, args, NULL_TREE);
3205 warning (OPT_Wattributes, "%qs attribute ignored",
3206 IDENTIFIER_POINTER (name));
3214 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
3215 /* Handle the "notshared" attribute. This attribute is another way of
3216 requesting hidden visibility. ARM's compiler supports
3217 "__declspec(notshared)"; we support the same thing via an
3221 arm_handle_notshared_attribute (tree *node,
3222 tree name ATTRIBUTE_UNUSED,
3223 tree args ATTRIBUTE_UNUSED,
3224 int flags ATTRIBUTE_UNUSED,
3227 tree decl = TYPE_NAME (*node);
3231 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
3232 DECL_VISIBILITY_SPECIFIED (decl) = 1;
3233 *no_add_attrs = false;
3239 /* Return 0 if the attributes for two types are incompatible, 1 if they
3240 are compatible, and 2 if they are nearly compatible (which causes a
3241 warning to be generated). */
3243 arm_comp_type_attributes (const_tree type1, const_tree type2)
3247 /* Check for mismatch of non-default calling convention. */
3248 if (TREE_CODE (type1) != FUNCTION_TYPE)
3251 /* Check for mismatched call attributes. */
3252 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
3253 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
3254 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
3255 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
3257 /* Only bother to check if an attribute is defined. */
3258 if (l1 | l2 | s1 | s2)
3260 /* If one type has an attribute, the other must have the same attribute. */
3261 if ((l1 != l2) || (s1 != s2))
3264 /* Disallow mixed attributes. */
3265 if ((l1 & s2) || (l2 & s1))
3269 /* Check for mismatched ISR attribute. */
3270 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3272 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3273 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3275 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3282 /* Assigns default attributes to newly defined type. This is used to
3283 set short_call/long_call attributes for function types of
3284 functions defined inside corresponding #pragma scopes. */
3286 arm_set_default_type_attributes (tree type)
3288 /* Add __attribute__ ((long_call)) to all functions, when
3289 inside #pragma long_calls or __attribute__ ((short_call)),
3290 when inside #pragma no_long_calls. */
3291 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3293 tree type_attr_list, attr_name;
3294 type_attr_list = TYPE_ATTRIBUTES (type);
3296 if (arm_pragma_long_calls == LONG)
3297 attr_name = get_identifier ("long_call");
3298 else if (arm_pragma_long_calls == SHORT)
3299 attr_name = get_identifier ("short_call");
3303 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3304 TYPE_ATTRIBUTES (type) = type_attr_list;
3308 /* Return true if DECL is known to be linked into section SECTION. */
3311 arm_function_in_section_p (tree decl, section *section)
3313 /* We can only be certain about functions defined in the same
3314 compilation unit. */
3315 if (!TREE_STATIC (decl))
3318 /* Make sure that SYMBOL always binds to the definition in this
3319 compilation unit. */
3320 if (!targetm.binds_local_p (decl))
3323 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
3324 if (!DECL_SECTION_NAME (decl))
3326 /* Make sure that we will not create a unique section for DECL. */
3327 if (flag_function_sections || DECL_ONE_ONLY (decl))
3331 return function_section (decl) == section;
3334 /* Return nonzero if a 32-bit "long_call" should be generated for
3335 a call from the current function to DECL. We generate a long_call
3338 a. has an __attribute__((long call))
3339 or b. is within the scope of a #pragma long_calls
3340 or c. the -mlong-calls command line switch has been specified
3342 However we do not generate a long call if the function:
3344 d. has an __attribute__ ((short_call))
3345 or e. is inside the scope of a #pragma no_long_calls
3346 or f. is defined in the same section as the current function. */
3349 arm_is_long_call_p (tree decl)
3354 return TARGET_LONG_CALLS;
3356 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
3357 if (lookup_attribute ("short_call", attrs))
3360 /* For "f", be conservative, and only cater for cases in which the
3361 whole of the current function is placed in the same section. */
3362 if (!flag_reorder_blocks_and_partition
3363 && arm_function_in_section_p (decl, current_function_section ()))
3366 if (lookup_attribute ("long_call", attrs))
3369 return TARGET_LONG_CALLS;
3372 /* Return nonzero if it is ok to make a tail-call to DECL. */
3374 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3376 unsigned long func_type;
3378 if (cfun->machine->sibcall_blocked)
3381 /* Never tailcall something for which we have no decl, or if we
3382 are in Thumb mode. */
3383 if (decl == NULL || TARGET_THUMB)
3386 /* The PIC register is live on entry to VxWorks PLT entries, so we
3387 must make the call before restoring the PIC register. */
3388 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
3391 /* Cannot tail-call to long calls, since these are out of range of
3392 a branch instruction. */
3393 if (arm_is_long_call_p (decl))
3396 /* If we are interworking and the function is not declared static
3397 then we can't tail-call it unless we know that it exists in this
3398 compilation unit (since it might be a Thumb routine). */
3399 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3402 func_type = arm_current_func_type ();
3403 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3404 if (IS_INTERRUPT (func_type))
3407 /* Never tailcall if function may be called with a misaligned SP. */
3408 if (IS_STACKALIGN (func_type))
3411 /* Everything else is ok. */
3416 /* Addressing mode support functions. */
3418 /* Return nonzero if X is a legitimate immediate operand when compiling
3419 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
3421 legitimate_pic_operand_p (rtx x)
3423 if (GET_CODE (x) == SYMBOL_REF
3424 || (GET_CODE (x) == CONST
3425 && GET_CODE (XEXP (x, 0)) == PLUS
3426 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3432 /* Record that the current function needs a PIC register. Initialize
3433 cfun->machine->pic_reg if we have not already done so. */
3436 require_pic_register (void)
3438 /* A lot of the logic here is made obscure by the fact that this
3439 routine gets called as part of the rtx cost estimation process.
3440 We don't want those calls to affect any assumptions about the real
3441 function; and further, we can't call entry_of_function() until we
3442 start the real expansion process. */
3443 if (!crtl->uses_pic_offset_table)
3445 gcc_assert (can_create_pseudo_p ());
3446 if (arm_pic_register != INVALID_REGNUM)
3448 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3450 /* Play games to avoid marking the function as needing pic
3451 if we are being called as part of the cost-estimation
3453 if (current_ir_type () != IR_GIMPLE)
3454 crtl->uses_pic_offset_table = 1;
3460 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3462 /* Play games to avoid marking the function as needing pic
3463 if we are being called as part of the cost-estimation
3465 if (current_ir_type () != IR_GIMPLE)
3467 crtl->uses_pic_offset_table = 1;
3470 arm_load_pic_register (0UL);
3474 emit_insn_after (seq, entry_of_function ());
3481 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3483 if (GET_CODE (orig) == SYMBOL_REF
3484 || GET_CODE (orig) == LABEL_REF)
3486 rtx pic_ref, address;
3490 /* If this function doesn't have a pic register, create one now. */
3491 require_pic_register ();
3495 gcc_assert (can_create_pseudo_p ());
3496 reg = gen_reg_rtx (Pmode);
3502 address = gen_reg_rtx (Pmode);
3507 emit_insn (gen_pic_load_addr_arm (address, orig));
3508 else if (TARGET_THUMB2)
3509 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
3510 else /* TARGET_THUMB1 */
3511 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
3513 /* VxWorks does not impose a fixed gap between segments; the run-time
3514 gap can be different from the object-file gap. We therefore can't
3515 use GOTOFF unless we are absolutely sure that the symbol is in the
3516 same segment as the GOT. Unfortunately, the flexibility of linker
3517 scripts means that we can't be sure of that in general, so assume
3518 that GOTOFF is never valid on VxWorks. */
3519 if ((GET_CODE (orig) == LABEL_REF
3520 || (GET_CODE (orig) == SYMBOL_REF &&
3521 SYMBOL_REF_LOCAL_P (orig)))
3523 && !TARGET_VXWORKS_RTP)
3524 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3527 pic_ref = gen_const_mem (Pmode,
3528 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3532 insn = emit_move_insn (reg, pic_ref);
3534 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3536 set_unique_reg_note (insn, REG_EQUAL, orig);
3540 else if (GET_CODE (orig) == CONST)
3544 if (GET_CODE (XEXP (orig, 0)) == PLUS
3545 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3548 /* Handle the case where we have: const (UNSPEC_TLS). */
3549 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3550 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3553 /* Handle the case where we have:
3554 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
3556 if (GET_CODE (XEXP (orig, 0)) == PLUS
3557 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
3558 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
3560 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
3566 gcc_assert (can_create_pseudo_p ());
3567 reg = gen_reg_rtx (Pmode);
3570 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3572 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3573 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3574 base == reg ? 0 : reg);
3576 if (GET_CODE (offset) == CONST_INT)
3578 /* The base register doesn't really matter, we only want to
3579 test the index for the appropriate mode. */
3580 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3582 gcc_assert (can_create_pseudo_p ());
3583 offset = force_reg (Pmode, offset);
3586 if (GET_CODE (offset) == CONST_INT)
3587 return plus_constant (base, INTVAL (offset));
3590 if (GET_MODE_SIZE (mode) > 4
3591 && (GET_MODE_CLASS (mode) == MODE_INT
3592 || TARGET_SOFT_FLOAT))
3594 emit_insn (gen_addsi3 (reg, base, offset));
3598 return gen_rtx_PLUS (Pmode, base, offset);
3605 /* Find a spare register to use during the prolog of a function. */
3608 thumb_find_work_register (unsigned long pushed_regs_mask)
3612 /* Check the argument registers first as these are call-used. The
3613 register allocation order means that sometimes r3 might be used
3614 but earlier argument registers might not, so check them all. */
3615 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3616 if (!df_regs_ever_live_p (reg))
3619 /* Before going on to check the call-saved registers we can try a couple
3620 more ways of deducing that r3 is available. The first is when we are
3621 pushing anonymous arguments onto the stack and we have less than 4
3622 registers worth of fixed arguments(*). In this case r3 will be part of
3623 the variable argument list and so we can be sure that it will be
3624 pushed right at the start of the function. Hence it will be available
3625 for the rest of the prologue.
3626 (*): ie crtl->args.pretend_args_size is greater than 0. */
3627 if (cfun->machine->uses_anonymous_args
3628 && crtl->args.pretend_args_size > 0)
3629 return LAST_ARG_REGNUM;
3631 /* The other case is when we have fixed arguments but less than 4 registers
3632 worth. In this case r3 might be used in the body of the function, but
3633 it is not being used to convey an argument into the function. In theory
3634 we could just check crtl->args.size to see how many bytes are
3635 being passed in argument registers, but it seems that it is unreliable.
3636 Sometimes it will have the value 0 when in fact arguments are being
3637 passed. (See testcase execute/20021111-1.c for an example). So we also
3638 check the args_info.nregs field as well. The problem with this field is
3639 that it makes no allowances for arguments that are passed to the
3640 function but which are not used. Hence we could miss an opportunity
3641 when a function has an unused argument in r3. But it is better to be
3642 safe than to be sorry. */
3643 if (! cfun->machine->uses_anonymous_args
3644 && crtl->args.size >= 0
3645 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3646 && crtl->args.info.nregs < 4)
3647 return LAST_ARG_REGNUM;
3649 /* Otherwise look for a call-saved register that is going to be pushed. */
3650 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3651 if (pushed_regs_mask & (1 << reg))
3656 /* Thumb-2 can use high regs. */
3657 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
3658 if (pushed_regs_mask & (1 << reg))
3661 /* Something went wrong - thumb_compute_save_reg_mask()
3662 should have arranged for a suitable register to be pushed. */
3666 static GTY(()) int pic_labelno;
3668 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3672 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3674 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
3676 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3679 gcc_assert (flag_pic);
3681 pic_reg = cfun->machine->pic_reg;
3682 if (TARGET_VXWORKS_RTP)
3684 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
3685 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3686 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3688 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
3690 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
3691 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
3695 /* We use an UNSPEC rather than a LABEL_REF because this label
3696 never appears in the code stream. */
3698 labelno = GEN_INT (pic_labelno++);
3699 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3700 l1 = gen_rtx_CONST (VOIDmode, l1);
3702 /* On the ARM the PC register contains 'dot + 8' at the time of the
3703 addition, on the Thumb it is 'dot + 4'. */
3704 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
3705 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
3707 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3711 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3712 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
3714 else if (TARGET_THUMB2)
3716 /* Thumb-2 only allows very limited access to the PC. Calculate the
3717 address in a temporary register. */
3718 if (arm_pic_register != INVALID_REGNUM)
3720 pic_tmp = gen_rtx_REG (SImode,
3721 thumb_find_work_register (saved_regs));
3725 gcc_assert (can_create_pseudo_p ());
3726 pic_tmp = gen_reg_rtx (Pmode);
3729 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
3730 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
3731 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
3733 else /* TARGET_THUMB1 */
3735 if (arm_pic_register != INVALID_REGNUM
3736 && REGNO (pic_reg) > LAST_LO_REGNUM)
3738 /* We will have pushed the pic register, so we should always be
3739 able to find a work register. */
3740 pic_tmp = gen_rtx_REG (SImode,
3741 thumb_find_work_register (saved_regs));
3742 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
3743 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3746 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
3747 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
3751 /* Need to emit this whether or not we obey regdecls,
3752 since setjmp/longjmp can cause life info to screw up. */
3757 /* Return nonzero if X is valid as an ARM state addressing register. */
3759 arm_address_register_rtx_p (rtx x, int strict_p)
3763 if (GET_CODE (x) != REG)
3769 return ARM_REGNO_OK_FOR_BASE_P (regno);
3771 return (regno <= LAST_ARM_REGNUM
3772 || regno >= FIRST_PSEUDO_REGISTER
3773 || regno == FRAME_POINTER_REGNUM
3774 || regno == ARG_POINTER_REGNUM);
3777 /* Return TRUE if this rtx is the difference of a symbol and a label,
3778 and will reduce to a PC-relative relocation in the object file.
3779 Expressions like this can be left alone when generating PIC, rather
3780 than forced through the GOT. */
3782 pcrel_constant_p (rtx x)
3784 if (GET_CODE (x) == MINUS)
3785 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
3790 /* Return nonzero if X is a valid ARM state address operand. */
3792 arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3796 enum rtx_code code = GET_CODE (x);
3798 if (arm_address_register_rtx_p (x, strict_p))
3801 use_ldrd = (TARGET_LDRD
3803 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3805 if (code == POST_INC || code == PRE_DEC
3806 || ((code == PRE_INC || code == POST_DEC)
3807 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3808 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3810 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3811 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3812 && GET_CODE (XEXP (x, 1)) == PLUS
3813 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3815 rtx addend = XEXP (XEXP (x, 1), 1);
3817 /* Don't allow ldrd post increment by register because it's hard
3818 to fixup invalid register choices. */
3820 && GET_CODE (x) == POST_MODIFY
3821 && GET_CODE (addend) == REG)
3824 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3825 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3828 /* After reload constants split into minipools will have addresses
3829 from a LABEL_REF. */
3830 else if (reload_completed
3831 && (code == LABEL_REF
3833 && GET_CODE (XEXP (x, 0)) == PLUS
3834 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3835 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3838 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3841 else if (code == PLUS)
3843 rtx xop0 = XEXP (x, 0);
3844 rtx xop1 = XEXP (x, 1);
3846 return ((arm_address_register_rtx_p (xop0, strict_p)
3847 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3848 || (arm_address_register_rtx_p (xop1, strict_p)
3849 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3853 /* Reload currently can't handle MINUS, so disable this for now */
3854 else if (GET_CODE (x) == MINUS)
3856 rtx xop0 = XEXP (x, 0);
3857 rtx xop1 = XEXP (x, 1);
3859 return (arm_address_register_rtx_p (xop0, strict_p)
3860 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3864 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3865 && code == SYMBOL_REF
3866 && CONSTANT_POOL_ADDRESS_P (x)
3868 && symbol_mentioned_p (get_pool_constant (x))
3869 && ! pcrel_constant_p (get_pool_constant (x))))
3875 /* Return nonzero if X is a valid Thumb-2 address operand. */
3877 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
3880 enum rtx_code code = GET_CODE (x);
3882 if (arm_address_register_rtx_p (x, strict_p))
3885 use_ldrd = (TARGET_LDRD
3887 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3889 if (code == POST_INC || code == PRE_DEC
3890 || ((code == PRE_INC || code == POST_DEC)
3891 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3892 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3894 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3895 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3896 && GET_CODE (XEXP (x, 1)) == PLUS
3897 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3899 /* Thumb-2 only has autoincrement by constant. */
3900 rtx addend = XEXP (XEXP (x, 1), 1);
3901 HOST_WIDE_INT offset;
3903 if (GET_CODE (addend) != CONST_INT)
3906 offset = INTVAL(addend);
3907 if (GET_MODE_SIZE (mode) <= 4)
3908 return (offset > -256 && offset < 256);
3910 return (use_ldrd && offset > -1024 && offset < 1024
3911 && (offset & 3) == 0);
3914 /* After reload constants split into minipools will have addresses
3915 from a LABEL_REF. */
3916 else if (reload_completed
3917 && (code == LABEL_REF
3919 && GET_CODE (XEXP (x, 0)) == PLUS
3920 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3921 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3924 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3927 else if (code == PLUS)
3929 rtx xop0 = XEXP (x, 0);
3930 rtx xop1 = XEXP (x, 1);
3932 return ((arm_address_register_rtx_p (xop0, strict_p)
3933 && thumb2_legitimate_index_p (mode, xop1, strict_p))
3934 || (arm_address_register_rtx_p (xop1, strict_p)
3935 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
3938 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3939 && code == SYMBOL_REF
3940 && CONSTANT_POOL_ADDRESS_P (x)
3942 && symbol_mentioned_p (get_pool_constant (x))
3943 && ! pcrel_constant_p (get_pool_constant (x))))
3949 /* Return nonzero if INDEX is valid for an address index operand in
3952 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
3955 HOST_WIDE_INT range;
3956 enum rtx_code code = GET_CODE (index);
3958 /* Standard coprocessor addressing modes. */
3959 if (TARGET_HARD_FLOAT
3960 && (TARGET_FPA || TARGET_MAVERICK)
3961 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3962 || (TARGET_MAVERICK && mode == DImode)))
3963 return (code == CONST_INT && INTVAL (index) < 1024
3964 && INTVAL (index) > -1024
3965 && (INTVAL (index) & 3) == 0);
3968 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
3969 return (code == CONST_INT
3970 && INTVAL (index) < 1016
3971 && INTVAL (index) > -1024
3972 && (INTVAL (index) & 3) == 0);
3974 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3975 return (code == CONST_INT
3976 && INTVAL (index) < 1024
3977 && INTVAL (index) > -1024
3978 && (INTVAL (index) & 3) == 0);
3980 if (arm_address_register_rtx_p (index, strict_p)
3981 && (GET_MODE_SIZE (mode) <= 4))
3984 if (mode == DImode || mode == DFmode)
3986 if (code == CONST_INT)
3988 HOST_WIDE_INT val = INTVAL (index);
3991 return val > -256 && val < 256;
3993 return val > -4096 && val < 4092;
3996 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
3999 if (GET_MODE_SIZE (mode) <= 4
4002 || (mode == QImode && outer == SIGN_EXTEND))))
4006 rtx xiop0 = XEXP (index, 0);
4007 rtx xiop1 = XEXP (index, 1);
4009 return ((arm_address_register_rtx_p (xiop0, strict_p)
4010 && power_of_two_operand (xiop1, SImode))
4011 || (arm_address_register_rtx_p (xiop1, strict_p)
4012 && power_of_two_operand (xiop0, SImode)));
4014 else if (code == LSHIFTRT || code == ASHIFTRT
4015 || code == ASHIFT || code == ROTATERT)
4017 rtx op = XEXP (index, 1);
4019 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4020 && GET_CODE (op) == CONST_INT
4022 && INTVAL (op) <= 31);
4026 /* For ARM v4 we may be doing a sign-extend operation during the
4030 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
4036 range = (mode == HImode) ? 4095 : 4096;
4038 return (code == CONST_INT
4039 && INTVAL (index) < range
4040 && INTVAL (index) > -range);
4043 /* Return true if OP is a valid index scaling factor for Thumb-2 address
4044 index operand. i.e. 1, 2, 4 or 8. */
4046 thumb2_index_mul_operand (rtx op)
4050 if (GET_CODE(op) != CONST_INT)
4054 return (val == 1 || val == 2 || val == 4 || val == 8);
4057 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
4059 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
4061 enum rtx_code code = GET_CODE (index);
4063 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
4064 /* Standard coprocessor addressing modes. */
4065 if (TARGET_HARD_FLOAT
4066 && (TARGET_FPA || TARGET_MAVERICK)
4067 && (GET_MODE_CLASS (mode) == MODE_FLOAT
4068 || (TARGET_MAVERICK && mode == DImode)))
4069 return (code == CONST_INT && INTVAL (index) < 1024
4070 && INTVAL (index) > -1024
4071 && (INTVAL (index) & 3) == 0);
4073 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4075 /* For DImode assume values will usually live in core regs
4076 and only allow LDRD addressing modes. */
4077 if (!TARGET_LDRD || mode != DImode)
4078 return (code == CONST_INT
4079 && INTVAL (index) < 1024
4080 && INTVAL (index) > -1024
4081 && (INTVAL (index) & 3) == 0);
4085 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4086 return (code == CONST_INT
4087 && INTVAL (index) < 1016
4088 && INTVAL (index) > -1024
4089 && (INTVAL (index) & 3) == 0);
4091 if (arm_address_register_rtx_p (index, strict_p)
4092 && (GET_MODE_SIZE (mode) <= 4))
4095 if (mode == DImode || mode == DFmode)
4097 HOST_WIDE_INT val = INTVAL (index);
4098 /* ??? Can we assume ldrd for thumb2? */
4099 /* Thumb-2 ldrd only has reg+const addressing modes. */
4100 if (code != CONST_INT)
4103 /* ldrd supports offsets of +-1020.
4104 However the ldr fallback does not. */
4105 return val > -256 && val < 256 && (val & 3) == 0;
4110 rtx xiop0 = XEXP (index, 0);
4111 rtx xiop1 = XEXP (index, 1);
4113 return ((arm_address_register_rtx_p (xiop0, strict_p)
4114 && thumb2_index_mul_operand (xiop1))
4115 || (arm_address_register_rtx_p (xiop1, strict_p)
4116 && thumb2_index_mul_operand (xiop0)));
4118 else if (code == ASHIFT)
4120 rtx op = XEXP (index, 1);
4122 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4123 && GET_CODE (op) == CONST_INT
4125 && INTVAL (op) <= 3);
4128 return (code == CONST_INT
4129 && INTVAL (index) < 4096
4130 && INTVAL (index) > -256);
4133 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
4135 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
4139 if (GET_CODE (x) != REG)
4145 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
4147 return (regno <= LAST_LO_REGNUM
4148 || regno > LAST_VIRTUAL_REGISTER
4149 || regno == FRAME_POINTER_REGNUM
4150 || (GET_MODE_SIZE (mode) >= 4
4151 && (regno == STACK_POINTER_REGNUM
4152 || regno >= FIRST_PSEUDO_REGISTER
4153 || x == hard_frame_pointer_rtx
4154 || x == arg_pointer_rtx)));
4157 /* Return nonzero if x is a legitimate index register. This is the case
4158 for any base register that can access a QImode object. */
4160 thumb1_index_register_rtx_p (rtx x, int strict_p)
4162 return thumb1_base_register_rtx_p (x, QImode, strict_p);
4165 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
4167 The AP may be eliminated to either the SP or the FP, so we use the
4168 least common denominator, e.g. SImode, and offsets from 0 to 64.
4170 ??? Verify whether the above is the right approach.
4172 ??? Also, the FP may be eliminated to the SP, so perhaps that
4173 needs special handling also.
4175 ??? Look at how the mips16 port solves this problem. It probably uses
4176 better ways to solve some of these problems.
4178 Although it is not incorrect, we don't accept QImode and HImode
4179 addresses based on the frame pointer or arg pointer until the
4180 reload pass starts. This is so that eliminating such addresses
4181 into stack based ones won't produce impossible code. */
4183 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4185 /* ??? Not clear if this is right. Experiment. */
4186 if (GET_MODE_SIZE (mode) < 4
4187 && !(reload_in_progress || reload_completed)
4188 && (reg_mentioned_p (frame_pointer_rtx, x)
4189 || reg_mentioned_p (arg_pointer_rtx, x)
4190 || reg_mentioned_p (virtual_incoming_args_rtx, x)
4191 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
4192 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
4193 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
4196 /* Accept any base register. SP only in SImode or larger. */
4197 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
4200 /* This is PC relative data before arm_reorg runs. */
4201 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
4202 && GET_CODE (x) == SYMBOL_REF
4203 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
4206 /* This is PC relative data after arm_reorg runs. */
4207 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
4208 && (GET_CODE (x) == LABEL_REF
4209 || (GET_CODE (x) == CONST
4210 && GET_CODE (XEXP (x, 0)) == PLUS
4211 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4212 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4215 /* Post-inc indexing only supported for SImode and larger. */
4216 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
4217 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
4220 else if (GET_CODE (x) == PLUS)
4222 /* REG+REG address can be any two index registers. */
4223 /* We disallow FRAME+REG addressing since we know that FRAME
4224 will be replaced with STACK, and SP relative addressing only
4225 permits SP+OFFSET. */
4226 if (GET_MODE_SIZE (mode) <= 4
4227 && XEXP (x, 0) != frame_pointer_rtx
4228 && XEXP (x, 1) != frame_pointer_rtx
4229 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4230 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
4233 /* REG+const has 5-7 bit offset for non-SP registers. */
4234 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4235 || XEXP (x, 0) == arg_pointer_rtx)
4236 && GET_CODE (XEXP (x, 1)) == CONST_INT
4237 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4240 /* REG+const has 10-bit offset for SP, but only SImode and
4241 larger is supported. */
4242 /* ??? Should probably check for DI/DFmode overflow here
4243 just like GO_IF_LEGITIMATE_OFFSET does. */
4244 else if (GET_CODE (XEXP (x, 0)) == REG
4245 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
4246 && GET_MODE_SIZE (mode) >= 4
4247 && GET_CODE (XEXP (x, 1)) == CONST_INT
4248 && INTVAL (XEXP (x, 1)) >= 0
4249 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
4250 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4253 else if (GET_CODE (XEXP (x, 0)) == REG
4254 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
4255 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
4256 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
4257 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
4258 && GET_MODE_SIZE (mode) >= 4
4259 && GET_CODE (XEXP (x, 1)) == CONST_INT
4260 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4264 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4265 && GET_MODE_SIZE (mode) == 4
4266 && GET_CODE (x) == SYMBOL_REF
4267 && CONSTANT_POOL_ADDRESS_P (x)
4269 && symbol_mentioned_p (get_pool_constant (x))
4270 && ! pcrel_constant_p (get_pool_constant (x))))
4276 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
4277 instruction of mode MODE. */
4279 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
4281 switch (GET_MODE_SIZE (mode))
4284 return val >= 0 && val < 32;
4287 return val >= 0 && val < 64 && (val & 1) == 0;
4291 && (val + GET_MODE_SIZE (mode)) <= 128
4296 /* Build the SYMBOL_REF for __tls_get_addr. */
4298 static GTY(()) rtx tls_get_addr_libfunc;
4301 get_tls_get_addr (void)
4303 if (!tls_get_addr_libfunc)
4304 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
4305 return tls_get_addr_libfunc;
4309 arm_load_tp (rtx target)
4312 target = gen_reg_rtx (SImode);
4316 /* Can return in any reg. */
4317 emit_insn (gen_load_tp_hard (target));
4321 /* Always returned in r0. Immediately copy the result into a pseudo,
4322 otherwise other uses of r0 (e.g. setting up function arguments) may
4323 clobber the value. */
4327 emit_insn (gen_load_tp_soft ());
4329 tmp = gen_rtx_REG (SImode, 0);
4330 emit_move_insn (target, tmp);
4336 load_tls_operand (rtx x, rtx reg)
4340 if (reg == NULL_RTX)
4341 reg = gen_reg_rtx (SImode);
4343 tmp = gen_rtx_CONST (SImode, x);
4345 emit_move_insn (reg, tmp);
4351 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
4353 rtx insns, label, labelno, sum;
4357 labelno = GEN_INT (pic_labelno++);
4358 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4359 label = gen_rtx_CONST (VOIDmode, label);
4361 sum = gen_rtx_UNSPEC (Pmode,
4362 gen_rtvec (4, x, GEN_INT (reloc), label,
4363 GEN_INT (TARGET_ARM ? 8 : 4)),
4365 reg = load_tls_operand (sum, reg);
4368 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
4369 else if (TARGET_THUMB2)
4372 /* Thumb-2 only allows very limited access to the PC. Calculate
4373 the address in a temporary register. */
4374 tmp = gen_reg_rtx (SImode);
4375 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4376 emit_insn (gen_addsi3(reg, reg, tmp));
4378 else /* TARGET_THUMB1 */
4379 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4381 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
4382 Pmode, 1, reg, Pmode);
4384 insns = get_insns ();
4391 legitimize_tls_address (rtx x, rtx reg)
4393 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
4394 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
4398 case TLS_MODEL_GLOBAL_DYNAMIC:
4399 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
4400 dest = gen_reg_rtx (Pmode);
4401 emit_libcall_block (insns, dest, ret, x);
4404 case TLS_MODEL_LOCAL_DYNAMIC:
4405 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
4407 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
4408 share the LDM result with other LD model accesses. */
4409 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
4411 dest = gen_reg_rtx (Pmode);
4412 emit_libcall_block (insns, dest, ret, eqv);
4414 /* Load the addend. */
4415 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
4417 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
4418 return gen_rtx_PLUS (Pmode, dest, addend);
4420 case TLS_MODEL_INITIAL_EXEC:
4421 labelno = GEN_INT (pic_labelno++);
4422 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4423 label = gen_rtx_CONST (VOIDmode, label);
4424 sum = gen_rtx_UNSPEC (Pmode,
4425 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
4426 GEN_INT (TARGET_ARM ? 8 : 4)),
4428 reg = load_tls_operand (sum, reg);
4431 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
4432 else if (TARGET_THUMB2)
4435 /* Thumb-2 only allows very limited access to the PC. Calculate
4436 the address in a temporary register. */
4437 tmp = gen_reg_rtx (SImode);
4438 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4439 emit_insn (gen_addsi3(reg, reg, tmp));
4440 emit_move_insn (reg, gen_const_mem (SImode, reg));
4444 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4445 emit_move_insn (reg, gen_const_mem (SImode, reg));
4448 tp = arm_load_tp (NULL_RTX);
4450 return gen_rtx_PLUS (Pmode, tp, reg);
4452 case TLS_MODEL_LOCAL_EXEC:
4453 tp = arm_load_tp (NULL_RTX);
4455 reg = gen_rtx_UNSPEC (Pmode,
4456 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
4458 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
4460 return gen_rtx_PLUS (Pmode, tp, reg);
4467 /* Try machine-dependent ways of modifying an illegitimate address
4468 to be legitimate. If we find one, return the new, valid address. */
4470 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4472 if (arm_tls_symbol_p (x))
4473 return legitimize_tls_address (x, NULL_RTX);
4475 if (GET_CODE (x) == PLUS)
4477 rtx xop0 = XEXP (x, 0);
4478 rtx xop1 = XEXP (x, 1);
4480 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4481 xop0 = force_reg (SImode, xop0);
4483 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4484 xop1 = force_reg (SImode, xop1);
4486 if (ARM_BASE_REGISTER_RTX_P (xop0)
4487 && GET_CODE (xop1) == CONST_INT)
4489 HOST_WIDE_INT n, low_n;
4493 /* VFP addressing modes actually allow greater offsets, but for
4494 now we just stick with the lowest common denominator. */
4496 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4508 low_n = ((mode) == TImode ? 0
4509 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4513 base_reg = gen_reg_rtx (SImode);
4514 val = force_operand (plus_constant (xop0, n), NULL_RTX);
4515 emit_move_insn (base_reg, val);
4516 x = plus_constant (base_reg, low_n);
4518 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4519 x = gen_rtx_PLUS (SImode, xop0, xop1);
4522 /* XXX We don't allow MINUS any more -- see comment in
4523 arm_legitimate_address_p (). */
4524 else if (GET_CODE (x) == MINUS)
4526 rtx xop0 = XEXP (x, 0);
4527 rtx xop1 = XEXP (x, 1);
4529 if (CONSTANT_P (xop0))
4530 xop0 = force_reg (SImode, xop0);
4532 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4533 xop1 = force_reg (SImode, xop1);
4535 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4536 x = gen_rtx_MINUS (SImode, xop0, xop1);
4539 /* Make sure to take full advantage of the pre-indexed addressing mode
4540 with absolute addresses which often allows for the base register to
4541 be factorized for multiple adjacent memory references, and it might
4542 even allows for the mini pool to be avoided entirely. */
4543 else if (GET_CODE (x) == CONST_INT && optimize > 0)
4546 HOST_WIDE_INT mask, base, index;
4549 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
4550 use a 8-bit index. So let's use a 12-bit index for SImode only and
4551 hope that arm_gen_constant will enable ldrb to use more bits. */
4552 bits = (mode == SImode) ? 12 : 8;
4553 mask = (1 << bits) - 1;
4554 base = INTVAL (x) & ~mask;
4555 index = INTVAL (x) & mask;
4556 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4558 /* It'll most probably be more efficient to generate the base
4559 with more bits set and use a negative index instead. */
4563 base_reg = force_reg (SImode, GEN_INT (base));
4564 x = plus_constant (base_reg, index);
4569 /* We need to find and carefully transform any SYMBOL and LABEL
4570 references; so go back to the original address expression. */
4571 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4573 if (new_x != orig_x)
4581 /* Try machine-dependent ways of modifying an illegitimate Thumb address
4582 to be legitimate. If we find one, return the new, valid address. */
4584 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4586 if (arm_tls_symbol_p (x))
4587 return legitimize_tls_address (x, NULL_RTX);
4589 if (GET_CODE (x) == PLUS
4590 && GET_CODE (XEXP (x, 1)) == CONST_INT
4591 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4592 || INTVAL (XEXP (x, 1)) < 0))
4594 rtx xop0 = XEXP (x, 0);
4595 rtx xop1 = XEXP (x, 1);
4596 HOST_WIDE_INT offset = INTVAL (xop1);
4598 /* Try and fold the offset into a biasing of the base register and
4599 then offsetting that. Don't do this when optimizing for space
4600 since it can cause too many CSEs. */
4601 if (optimize_size && offset >= 0
4602 && offset < 256 + 31 * GET_MODE_SIZE (mode))
4604 HOST_WIDE_INT delta;
4607 delta = offset - (256 - GET_MODE_SIZE (mode));
4608 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4609 delta = 31 * GET_MODE_SIZE (mode);
4611 delta = offset & (~31 * GET_MODE_SIZE (mode));
4613 xop0 = force_operand (plus_constant (xop0, offset - delta),
4615 x = plus_constant (xop0, delta);
4617 else if (offset < 0 && offset > -256)
4618 /* Small negative offsets are best done with a subtract before the
4619 dereference, forcing these into a register normally takes two
4621 x = force_operand (x, NULL_RTX);
4624 /* For the remaining cases, force the constant into a register. */
4625 xop1 = force_reg (SImode, xop1);
4626 x = gen_rtx_PLUS (SImode, xop0, xop1);
4629 else if (GET_CODE (x) == PLUS
4630 && s_register_operand (XEXP (x, 1), SImode)
4631 && !s_register_operand (XEXP (x, 0), SImode))
4633 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4635 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4640 /* We need to find and carefully transform any SYMBOL and LABEL
4641 references; so go back to the original address expression. */
4642 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4644 if (new_x != orig_x)
4652 thumb_legitimize_reload_address (rtx *x_p,
4653 enum machine_mode mode,
4654 int opnum, int type,
4655 int ind_levels ATTRIBUTE_UNUSED)
4659 if (GET_CODE (x) == PLUS
4660 && GET_MODE_SIZE (mode) < 4
4661 && REG_P (XEXP (x, 0))
4662 && XEXP (x, 0) == stack_pointer_rtx
4663 && GET_CODE (XEXP (x, 1)) == CONST_INT
4664 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4669 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4670 Pmode, VOIDmode, 0, 0, opnum, type);
4674 /* If both registers are hi-regs, then it's better to reload the
4675 entire expression rather than each register individually. That
4676 only requires one reload register rather than two. */
4677 if (GET_CODE (x) == PLUS
4678 && REG_P (XEXP (x, 0))
4679 && REG_P (XEXP (x, 1))
4680 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4681 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4686 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4687 Pmode, VOIDmode, 0, 0, opnum, type);
4694 /* Test for various thread-local symbols. */
4696 /* Return TRUE if X is a thread-local symbol. */
4699 arm_tls_symbol_p (rtx x)
4701 if (! TARGET_HAVE_TLS)
4704 if (GET_CODE (x) != SYMBOL_REF)
4707 return SYMBOL_REF_TLS_MODEL (x) != 0;
4710 /* Helper for arm_tls_referenced_p. */
4713 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
4715 if (GET_CODE (*x) == SYMBOL_REF)
4716 return SYMBOL_REF_TLS_MODEL (*x) != 0;
4718 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
4719 TLS offsets, not real symbol references. */
4720 if (GET_CODE (*x) == UNSPEC
4721 && XINT (*x, 1) == UNSPEC_TLS)
4727 /* Return TRUE if X contains any TLS symbol references. */
4730 arm_tls_referenced_p (rtx x)
4732 if (! TARGET_HAVE_TLS)
4735 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
4738 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
4741 arm_cannot_force_const_mem (rtx x)
4745 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
4747 split_const (x, &base, &offset);
4748 if (GET_CODE (base) == SYMBOL_REF
4749 && !offset_within_block_p (base, INTVAL (offset)))
4752 return arm_tls_referenced_p (x);
4755 #define REG_OR_SUBREG_REG(X) \
4756 (GET_CODE (X) == REG \
4757 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
4759 #define REG_OR_SUBREG_RTX(X) \
4760 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
4762 #ifndef COSTS_N_INSNS
4763 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
4766 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
4768 enum machine_mode mode = GET_MODE (x);
4781 return COSTS_N_INSNS (1);
4784 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4787 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
4794 return COSTS_N_INSNS (2) + cycles;
4796 return COSTS_N_INSNS (1) + 16;
4799 return (COSTS_N_INSNS (1)
4800 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
4801 + GET_CODE (SET_DEST (x)) == MEM));
4806 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
4808 if (thumb_shiftable_const (INTVAL (x)))
4809 return COSTS_N_INSNS (2);
4810 return COSTS_N_INSNS (3);
4812 else if ((outer == PLUS || outer == COMPARE)
4813 && INTVAL (x) < 256 && INTVAL (x) > -256)
4815 else if (outer == AND
4816 && INTVAL (x) < 256 && INTVAL (x) >= -256)
4817 return COSTS_N_INSNS (1);
4818 else if (outer == ASHIFT || outer == ASHIFTRT
4819 || outer == LSHIFTRT)
4821 return COSTS_N_INSNS (2);
4827 return COSTS_N_INSNS (3);
4845 /* XXX another guess. */
4846 /* Memory costs quite a lot for the first word, but subsequent words
4847 load at the equivalent of a single insn each. */
4848 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4849 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
4854 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4859 /* XXX still guessing. */
4860 switch (GET_MODE (XEXP (x, 0)))
4863 return (1 + (mode == DImode ? 4 : 0)
4864 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4867 return (4 + (mode == DImode ? 4 : 0)
4868 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4871 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4883 /* Worker routine for arm_rtx_costs. */
4884 /* ??? This needs updating for thumb2. */
4886 arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
4888 enum machine_mode mode = GET_MODE (x);
4889 enum rtx_code subcode;
4895 /* Memory costs quite a lot for the first word, but subsequent words
4896 load at the equivalent of a single insn each. */
4897 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4898 + (GET_CODE (x) == SYMBOL_REF
4899 && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
4905 return optimize_size ? COSTS_N_INSNS (2) : 100;
4908 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4915 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
4917 return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
4918 + ((GET_CODE (XEXP (x, 0)) == REG
4919 || (GET_CODE (XEXP (x, 0)) == SUBREG
4920 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4924 /* Increase the cost of complex shifts because they aren't any faster,
4925 and reduce dual issue opportunities. */
4926 if (arm_tune_cortex_a9
4927 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
4930 return (extra_cost + ((GET_CODE (XEXP (x, 0)) == REG
4931 || (GET_CODE (XEXP (x, 0)) == SUBREG
4932 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4934 + ((GET_CODE (XEXP (x, 1)) == REG
4935 || (GET_CODE (XEXP (x, 1)) == SUBREG
4936 && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
4937 || (GET_CODE (XEXP (x, 1)) == CONST_INT))
4941 if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2)
4943 extra_cost = rtx_cost (XEXP (x, 1), code, true);
4944 if (!REG_OR_SUBREG_REG (XEXP (x, 0)))
4945 extra_cost += 4 * ARM_NUM_REGS (mode);
4950 return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
4951 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4952 || (GET_CODE (XEXP (x, 0)) == CONST_INT
4953 && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
4956 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4957 return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4958 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4959 && arm_const_double_rtx (XEXP (x, 1))))
4961 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4962 || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
4963 && arm_const_double_rtx (XEXP (x, 0))))
4966 if (((GET_CODE (XEXP (x, 0)) == CONST_INT
4967 && const_ok_for_arm (INTVAL (XEXP (x, 0)))
4968 && REG_OR_SUBREG_REG (XEXP (x, 1))))
4969 || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
4970 || subcode == ASHIFTRT || subcode == LSHIFTRT
4971 || subcode == ROTATE || subcode == ROTATERT
4973 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4974 && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
4975 (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
4976 && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
4977 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
4978 || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
4979 && REG_OR_SUBREG_REG (XEXP (x, 0))))
4984 if (arm_arch6 && mode == SImode
4985 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4986 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4987 return 1 + (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM ? 10 : 0)
4988 + (GET_CODE (XEXP (x, 1)) == MEM ? 10 : 0);
4990 if (GET_CODE (XEXP (x, 0)) == MULT)
4992 extra_cost = rtx_cost (XEXP (x, 0), code, true);
4993 if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
4994 extra_cost += 4 * ARM_NUM_REGS (mode);
4998 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4999 return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
5000 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
5001 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
5002 && arm_const_double_rtx (XEXP (x, 1))))
5006 case AND: case XOR: case IOR:
5009 /* Normally the frame registers will be spilt into reg+const during
5010 reload, so it is a bad idea to combine them with other instructions,
5011 since then they might not be moved outside of loops. As a compromise
5012 we allow integration with ops that have a constant as their second
5014 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
5015 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
5016 && GET_CODE (XEXP (x, 1)) != CONST_INT)
5017 || (REG_OR_SUBREG_REG (XEXP (x, 0))
5018 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
5022 return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
5023 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
5024 || (GET_CODE (XEXP (x, 1)) == CONST_INT
5025 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
5028 if (REG_OR_SUBREG_REG (XEXP (x, 0)))
5029 return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
5030 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
5031 || (GET_CODE (XEXP (x, 1)) == CONST_INT
5032 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
5035 else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
5036 return (1 + extra_cost
5037 + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
5038 || subcode == LSHIFTRT || subcode == ASHIFTRT
5039 || subcode == ROTATE || subcode == ROTATERT
5041 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5042 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
5043 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
5044 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
5045 && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1))
5046 && !arm_tune_cortex_a9)
5047 || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
5053 /* This should have been handled by the CPU specific routines. */
5057 if (arm_arch3m && mode == SImode
5058 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5059 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5060 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
5061 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
5062 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5063 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
5068 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5069 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
5073 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
5075 return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
5078 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5086 return 4 + (mode == DImode ? 4 : 0);
5089 if (arm_arch_thumb2 && mode == SImode)
5090 return 1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0);
5092 if (GET_MODE (XEXP (x, 0)) == QImode)
5093 return (4 + (mode == DImode ? 4 : 0)
5094 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5097 if (arm_arch6 && mode == SImode)
5098 return 1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0);
5100 switch (GET_MODE (XEXP (x, 0)))
5103 return (1 + (mode == DImode ? 4 : 0)
5104 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5107 return (4 + (mode == DImode ? 4 : 0)
5108 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5111 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5126 if (const_ok_for_arm (INTVAL (x)))
5127 return outer == SET ? 2 : -1;
5128 else if (outer == AND
5129 && const_ok_for_arm (~INTVAL (x)))
5131 else if ((outer == COMPARE
5132 || outer == PLUS || outer == MINUS)
5133 && const_ok_for_arm (-INTVAL (x)))
5145 return (outer == SET) ? 1 : -1;
5148 if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
5149 return outer == SET ? 2 : -1;
5150 else if ((outer == COMPARE || outer == PLUS)
5151 && neg_const_double_rtx_ok_for_fpa (x))
5160 /* RTX costs when optimizing for size. */
5162 arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
5164 enum machine_mode mode = GET_MODE (x);
5168 /* XXX TBD. For now, use the standard costs. */
5169 *total = thumb1_rtx_costs (x, code, outer_code);
5173 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
5177 /* A memory access costs 1 insn if the mode is small, or the address is
5178 a single register, otherwise it costs one insn per word. */
5179 if (REG_P (XEXP (x, 0)))
5180 *total = COSTS_N_INSNS (1);
5182 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5189 /* Needs a libcall, so it costs about this. */
5190 *total = COSTS_N_INSNS (2);
5194 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
5196 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
5204 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
5206 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
5209 else if (mode == SImode)
5211 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
5212 /* Slightly disparage register shifts, but not by much. */
5213 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5214 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
5218 /* Needs a libcall. */
5219 *total = COSTS_N_INSNS (2);
5223 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5225 *total = COSTS_N_INSNS (1);
5231 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
5232 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
5234 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
5235 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
5236 || subcode1 == ROTATE || subcode1 == ROTATERT
5237 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
5238 || subcode1 == ASHIFTRT)
5240 /* It's just the cost of the two operands. */
5245 *total = COSTS_N_INSNS (1);
5249 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5253 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5255 *total = COSTS_N_INSNS (1);
5260 case AND: case XOR: case IOR:
5263 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
5265 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
5266 || subcode == LSHIFTRT || subcode == ASHIFTRT
5267 || (code == AND && subcode == NOT))
5269 /* It's just the cost of the two operands. */
5275 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5279 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5283 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5284 *total = COSTS_N_INSNS (1);
5287 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5296 if (cc_register (XEXP (x, 0), VOIDmode))
5299 *total = COSTS_N_INSNS (1);
5303 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5304 *total = COSTS_N_INSNS (1);
5306 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
5311 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
5313 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5314 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5317 *total += COSTS_N_INSNS (1);
5322 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5324 switch (GET_MODE (XEXP (x, 0)))
5327 *total += COSTS_N_INSNS (1);
5331 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5337 *total += COSTS_N_INSNS (2);
5342 *total += COSTS_N_INSNS (1);
5347 if (const_ok_for_arm (INTVAL (x)))
5348 *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
5349 else if (const_ok_for_arm (~INTVAL (x)))
5350 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
5351 else if (const_ok_for_arm (-INTVAL (x)))
5353 if (outer_code == COMPARE || outer_code == PLUS
5354 || outer_code == MINUS)
5357 *total = COSTS_N_INSNS (1);
5360 *total = COSTS_N_INSNS (2);
5366 *total = COSTS_N_INSNS (2);
5370 *total = COSTS_N_INSNS (4);
5375 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
5376 cost of these slightly. */
5377 *total = COSTS_N_INSNS (1) + 1;
5381 if (mode != VOIDmode)
5382 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5384 *total = COSTS_N_INSNS (4); /* How knows? */
5389 /* RTX costs when optimizing for size. */
5391 arm_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
5394 return arm_size_rtx_costs (x, code, outer_code, total);
5396 return all_cores[(int)arm_tune].rtx_costs (x, code, outer_code, total);
5399 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
5400 supported on any "slowmul" cores, so it can be ignored. */
5403 arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5405 enum machine_mode mode = GET_MODE (x);
5409 *total = thumb1_rtx_costs (x, code, outer_code);
5416 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5423 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5425 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5426 & (unsigned HOST_WIDE_INT) 0xffffffff);
5427 int cost, const_ok = const_ok_for_arm (i);
5428 int j, booth_unit_size;
5430 /* Tune as appropriate. */
5431 cost = const_ok ? 4 : 8;
5432 booth_unit_size = 2;
5433 for (j = 0; i && j < 32; j += booth_unit_size)
5435 i >>= booth_unit_size;
5443 *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5444 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5448 *total = arm_rtx_costs_1 (x, code, outer_code);
5454 /* RTX cost for cores with a fast multiply unit (M variants). */
5457 arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5459 enum machine_mode mode = GET_MODE (x);
5463 *total = thumb1_rtx_costs (x, code, outer_code);
5467 /* ??? should thumb2 use different costs? */
5471 /* There is no point basing this on the tuning, since it is always the
5472 fast variant if it exists at all. */
5474 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5475 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5476 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5483 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5490 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5492 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5493 & (unsigned HOST_WIDE_INT) 0xffffffff);
5494 int cost, const_ok = const_ok_for_arm (i);
5495 int j, booth_unit_size;
5497 /* Tune as appropriate. */
5498 cost = const_ok ? 4 : 8;
5499 booth_unit_size = 8;
5500 for (j = 0; i && j < 32; j += booth_unit_size)
5502 i >>= booth_unit_size;
5510 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5511 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5515 *total = arm_rtx_costs_1 (x, code, outer_code);
5521 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
5522 so it can be ignored. */
5525 arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
5527 enum machine_mode mode = GET_MODE (x);
5531 *total = thumb1_rtx_costs (x, code, outer_code);
5538 /* There is no point basing this on the tuning, since it is always the
5539 fast variant if it exists at all. */
5541 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5542 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5543 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5550 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5557 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5559 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5560 & (unsigned HOST_WIDE_INT) 0xffffffff);
5561 int cost, const_ok = const_ok_for_arm (i);
5562 unsigned HOST_WIDE_INT masked_const;
5564 /* The cost will be related to two insns.
5565 First a load of the constant (MOV or LDR), then a multiply. */
5568 cost += 1; /* LDR is probably more expensive because
5569 of longer result latency. */
5570 masked_const = i & 0xffff8000;
5571 if (masked_const != 0 && masked_const != 0xffff8000)
5573 masked_const = i & 0xf8000000;
5574 if (masked_const == 0 || masked_const == 0xf8000000)
5583 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5584 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5588 /* A COMPARE of a MULT is slow on XScale; the muls instruction
5589 will stall until the multiplication is complete. */
5590 if (GET_CODE (XEXP (x, 0)) == MULT)
5591 *total = 4 + rtx_cost (XEXP (x, 0), code, true);
5593 *total = arm_rtx_costs_1 (x, code, outer_code);
5597 *total = arm_rtx_costs_1 (x, code, outer_code);
5603 /* RTX costs for 9e (and later) cores. */
5606 arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
5608 enum machine_mode mode = GET_MODE (x);
5617 *total = COSTS_N_INSNS (3);
5621 *total = thumb1_rtx_costs (x, code, outer_code);
5629 /* There is no point basing this on the tuning, since it is always the
5630 fast variant if it exists at all. */
5632 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5633 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5634 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5641 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5658 *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
5659 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
5663 *total = arm_rtx_costs_1 (x, code, outer_code);
5667 /* All address computations that can be done are free, but rtx cost returns
5668 the same for practically all of them. So we weight the different types
5669 of address here in the order (most pref first):
5670 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
5672 arm_arm_address_cost (rtx x)
5674 enum rtx_code c = GET_CODE (x);
5676 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
5678 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
5681 if (c == PLUS || c == MINUS)
5683 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5686 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
5696 arm_thumb_address_cost (rtx x)
5698 enum rtx_code c = GET_CODE (x);
5703 && GET_CODE (XEXP (x, 0)) == REG
5704 && GET_CODE (XEXP (x, 1)) == CONST_INT)
5711 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
5713 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
5717 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
5721 /* Some true dependencies can have a higher cost depending
5722 on precisely how certain input operands are used. */
5724 && REG_NOTE_KIND (link) == 0
5725 && recog_memoized (insn) >= 0
5726 && recog_memoized (dep) >= 0)
5728 int shift_opnum = get_attr_shift (insn);
5729 enum attr_type attr_type = get_attr_type (dep);
5731 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
5732 operand for INSN. If we have a shifted input operand and the
5733 instruction we depend on is another ALU instruction, then we may
5734 have to account for an additional stall. */
5735 if (shift_opnum != 0
5736 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
5738 rtx shifted_operand;
5741 /* Get the shifted operand. */
5742 extract_insn (insn);
5743 shifted_operand = recog_data.operand[shift_opnum];
5745 /* Iterate over all the operands in DEP. If we write an operand
5746 that overlaps with SHIFTED_OPERAND, then we have increase the
5747 cost of this dependency. */
5749 preprocess_constraints ();
5750 for (opno = 0; opno < recog_data.n_operands; opno++)
5752 /* We can ignore strict inputs. */
5753 if (recog_data.operand_type[opno] == OP_IN)
5756 if (reg_overlap_mentioned_p (recog_data.operand[opno],
5763 /* XXX This is not strictly true for the FPA. */
5764 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
5765 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
5768 /* Call insns don't incur a stall, even if they follow a load. */
5769 if (REG_NOTE_KIND (link) == 0
5770 && GET_CODE (insn) == CALL_INSN)
5773 if ((i_pat = single_set (insn)) != NULL
5774 && GET_CODE (SET_SRC (i_pat)) == MEM
5775 && (d_pat = single_set (dep)) != NULL
5776 && GET_CODE (SET_DEST (d_pat)) == MEM)
5778 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
5779 /* This is a load after a store, there is no conflict if the load reads
5780 from a cached area. Assume that loads from the stack, and from the
5781 constant pool are cached, and that others will miss. This is a
5784 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
5785 || reg_mentioned_p (stack_pointer_rtx, src_mem)
5786 || reg_mentioned_p (frame_pointer_rtx, src_mem)
5787 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
5794 static int fp_consts_inited = 0;
5796 /* Only zero is valid for VFP. Other values are also valid for FPA. */
5797 static const char * const strings_fp[8] =
5800 "4", "5", "0.5", "10"
5803 static REAL_VALUE_TYPE values_fp[8];
5806 init_fp_table (void)
5812 fp_consts_inited = 1;
5814 fp_consts_inited = 8;
5816 for (i = 0; i < fp_consts_inited; i++)
5818 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
5823 /* Return TRUE if rtx X is a valid immediate FP constant. */
5825 arm_const_double_rtx (rtx x)
5830 if (!fp_consts_inited)
5833 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5834 if (REAL_VALUE_MINUS_ZERO (r))
5837 for (i = 0; i < fp_consts_inited; i++)
5838 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5844 /* Return TRUE if rtx X is a valid immediate FPA constant. */
5846 neg_const_double_rtx_ok_for_fpa (rtx x)
5851 if (!fp_consts_inited)
5854 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5855 r = REAL_VALUE_NEGATE (r);
5856 if (REAL_VALUE_MINUS_ZERO (r))
5859 for (i = 0; i < 8; i++)
5860 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5867 /* VFPv3 has a fairly wide range of representable immediates, formed from
5868 "quarter-precision" floating-point values. These can be evaluated using this
5869 formula (with ^ for exponentiation):
5873 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
5874 16 <= n <= 31 and 0 <= r <= 7.
5876 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
5878 - A (most-significant) is the sign bit.
5879 - BCD are the exponent (encoded as r XOR 3).
5880 - EFGH are the mantissa (encoded as n - 16).
5883 /* Return an integer index for a VFPv3 immediate operand X suitable for the
5884 fconst[sd] instruction, or -1 if X isn't suitable. */
5886 vfp3_const_double_index (rtx x)
5888 REAL_VALUE_TYPE r, m;
5890 unsigned HOST_WIDE_INT mantissa, mant_hi;
5891 unsigned HOST_WIDE_INT mask;
5892 HOST_WIDE_INT m1, m2;
5893 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
5895 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
5898 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5900 /* We can't represent these things, so detect them first. */
5901 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
5904 /* Extract sign, exponent and mantissa. */
5905 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
5906 r = REAL_VALUE_ABS (r);
5907 exponent = REAL_EXP (&r);
5908 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
5909 highest (sign) bit, with a fixed binary point at bit point_pos.
5910 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
5911 bits for the mantissa, this may fail (low bits would be lost). */
5912 real_ldexp (&m, &r, point_pos - exponent);
5913 REAL_VALUE_TO_INT (&m1, &m2, m);
5917 /* If there are bits set in the low part of the mantissa, we can't
5918 represent this value. */
5922 /* Now make it so that mantissa contains the most-significant bits, and move
5923 the point_pos to indicate that the least-significant bits have been
5925 point_pos -= HOST_BITS_PER_WIDE_INT;
5928 /* We can permit four significant bits of mantissa only, plus a high bit
5929 which is always 1. */
5930 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
5931 if ((mantissa & mask) != 0)
5934 /* Now we know the mantissa is in range, chop off the unneeded bits. */
5935 mantissa >>= point_pos - 5;
5937 /* The mantissa may be zero. Disallow that case. (It's possible to load the
5938 floating-point immediate zero with Neon using an integer-zero load, but
5939 that case is handled elsewhere.) */
5943 gcc_assert (mantissa >= 16 && mantissa <= 31);
5945 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
5946 normalized significands are in the range [1, 2). (Our mantissa is shifted
5947 left 4 places at this point relative to normalized IEEE754 values). GCC
5948 internally uses [0.5, 1) (see real.c), so the exponent returned from
5949 REAL_EXP must be altered. */
5950 exponent = 5 - exponent;
5952 if (exponent < 0 || exponent > 7)
5955 /* Sign, mantissa and exponent are now in the correct form to plug into the
5956 formula described in the comment above. */
5957 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
5960 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
5962 vfp3_const_double_rtx (rtx x)
5967 return vfp3_const_double_index (x) != -1;
5970 /* Recognize immediates which can be used in various Neon instructions. Legal
5971 immediates are described by the following table (for VMVN variants, the
5972 bitwise inverse of the constant shown is recognized. In either case, VMOV
5973 is output and the correct instruction to use for a given constant is chosen
5974 by the assembler). The constant shown is replicated across all elements of
5975 the destination vector.
5977 insn elems variant constant (binary)
5978 ---- ----- ------- -----------------
5979 vmov i32 0 00000000 00000000 00000000 abcdefgh
5980 vmov i32 1 00000000 00000000 abcdefgh 00000000
5981 vmov i32 2 00000000 abcdefgh 00000000 00000000
5982 vmov i32 3 abcdefgh 00000000 00000000 00000000
5983 vmov i16 4 00000000 abcdefgh
5984 vmov i16 5 abcdefgh 00000000
5985 vmvn i32 6 00000000 00000000 00000000 abcdefgh
5986 vmvn i32 7 00000000 00000000 abcdefgh 00000000
5987 vmvn i32 8 00000000 abcdefgh 00000000 00000000
5988 vmvn i32 9 abcdefgh 00000000 00000000 00000000
5989 vmvn i16 10 00000000 abcdefgh
5990 vmvn i16 11 abcdefgh 00000000
5991 vmov i32 12 00000000 00000000 abcdefgh 11111111
5992 vmvn i32 13 00000000 00000000 abcdefgh 11111111
5993 vmov i32 14 00000000 abcdefgh 11111111 11111111
5994 vmvn i32 15 00000000 abcdefgh 11111111 11111111
5996 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
5997 eeeeeeee ffffffff gggggggg hhhhhhhh
5998 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
6000 For case 18, B = !b. Representable values are exactly those accepted by
6001 vfp3_const_double_index, but are output as floating-point numbers rather
6004 Variants 0-5 (inclusive) may also be used as immediates for the second
6005 operand of VORR/VBIC instructions.
6007 The INVERSE argument causes the bitwise inverse of the given operand to be
6008 recognized instead (used for recognizing legal immediates for the VAND/VORN
6009 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
6010 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
6011 output, rather than the real insns vbic/vorr).
6013 INVERSE makes no difference to the recognition of float vectors.
6015 The return value is the variant of immediate as shown in the above table, or
6016 -1 if the given value doesn't match any of the listed patterns.
6019 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
6020 rtx *modconst, int *elementwidth)
6022 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
6024 for (i = 0; i < idx; i += (STRIDE)) \
6029 immtype = (CLASS); \
6030 elsize = (ELSIZE); \
6034 unsigned int i, elsize, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6035 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6036 unsigned char bytes[16];
6037 int immtype = -1, matches;
6038 unsigned int invmask = inverse ? 0xff : 0;
6040 /* Vectors of float constants. */
6041 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6043 rtx el0 = CONST_VECTOR_ELT (op, 0);
6046 if (!vfp3_const_double_rtx (el0))
6049 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
6051 for (i = 1; i < n_elts; i++)
6053 rtx elt = CONST_VECTOR_ELT (op, i);
6056 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
6058 if (!REAL_VALUES_EQUAL (r0, re))
6063 *modconst = CONST_VECTOR_ELT (op, 0);
6071 /* Splat vector constant out into a byte vector. */
6072 for (i = 0; i < n_elts; i++)
6074 rtx el = CONST_VECTOR_ELT (op, i);
6075 unsigned HOST_WIDE_INT elpart;
6076 unsigned int part, parts;
6078 if (GET_CODE (el) == CONST_INT)
6080 elpart = INTVAL (el);
6083 else if (GET_CODE (el) == CONST_DOUBLE)
6085 elpart = CONST_DOUBLE_LOW (el);
6091 for (part = 0; part < parts; part++)
6094 for (byte = 0; byte < innersize; byte++)
6096 bytes[idx++] = (elpart & 0xff) ^ invmask;
6097 elpart >>= BITS_PER_UNIT;
6099 if (GET_CODE (el) == CONST_DOUBLE)
6100 elpart = CONST_DOUBLE_HIGH (el);
6105 gcc_assert (idx == GET_MODE_SIZE (mode));
6109 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6110 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6112 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6113 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6115 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6116 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6118 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6119 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
6121 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
6123 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
6125 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6126 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6128 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6129 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6131 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6132 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6134 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6135 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
6137 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
6139 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
6141 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6142 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6144 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6145 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6147 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6148 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6150 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6151 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6153 CHECK (1, 8, 16, bytes[i] == bytes[0]);
6155 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6156 && bytes[i] == bytes[(i + 8) % idx]);
6164 *elementwidth = elsize;
6168 unsigned HOST_WIDE_INT imm = 0;
6170 /* Un-invert bytes of recognized vector, if necessary. */
6172 for (i = 0; i < idx; i++)
6173 bytes[i] ^= invmask;
6177 /* FIXME: Broken on 32-bit H_W_I hosts. */
6178 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6180 for (i = 0; i < 8; i++)
6181 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6182 << (i * BITS_PER_UNIT);
6184 *modconst = GEN_INT (imm);
6188 unsigned HOST_WIDE_INT imm = 0;
6190 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6191 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6193 *modconst = GEN_INT (imm);
6201 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
6202 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
6203 float elements), and a modified constant (whatever should be output for a
6204 VMOV) in *MODCONST. */
6207 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
6208 rtx *modconst, int *elementwidth)
6212 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
6218 *modconst = tmpconst;
6221 *elementwidth = tmpwidth;
6226 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
6227 the immediate is valid, write a constant suitable for using as an operand
6228 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
6229 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
6232 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
6233 rtx *modconst, int *elementwidth)
6237 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
6239 if (retval < 0 || retval > 5)
6243 *modconst = tmpconst;
6246 *elementwidth = tmpwidth;
6251 /* Return a string suitable for output of Neon immediate logic operation
6255 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
6256 int inverse, int quad)
6258 int width, is_valid;
6259 static char templ[40];
6261 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
6263 gcc_assert (is_valid != 0);
6266 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
6268 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
6273 /* Output a sequence of pairwise operations to implement a reduction.
6274 NOTE: We do "too much work" here, because pairwise operations work on two
6275 registers-worth of operands in one go. Unfortunately we can't exploit those
6276 extra calculations to do the full operation in fewer steps, I don't think.
6277 Although all vector elements of the result but the first are ignored, we
6278 actually calculate the same result in each of the elements. An alternative
6279 such as initially loading a vector with zero to use as each of the second
6280 operands would use up an additional register and take an extra instruction,
6281 for no particular gain. */
6284 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
6285 rtx (*reduc) (rtx, rtx, rtx))
6287 enum machine_mode inner = GET_MODE_INNER (mode);
6288 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
6291 for (i = parts / 2; i >= 1; i /= 2)
6293 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
6294 emit_insn (reduc (dest, tmpsum, tmpsum));
6299 /* Initialize a vector with non-constant elements. FIXME: We can do better
6300 than the current implementation (building a vector on the stack and then
6301 loading it) in many cases. See rs6000.c. */
6304 neon_expand_vector_init (rtx target, rtx vals)
6306 enum machine_mode mode = GET_MODE (target);
6307 enum machine_mode inner = GET_MODE_INNER (mode);
6308 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
6311 gcc_assert (VECTOR_MODE_P (mode));
6313 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
6314 for (i = 0; i < n_elts; i++)
6315 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
6316 XVECEXP (vals, 0, i));
6318 emit_move_insn (target, mem);
6321 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
6322 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
6323 reported source locations are bogus. */
6326 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
6331 gcc_assert (GET_CODE (operand) == CONST_INT);
6333 lane = INTVAL (operand);
6335 if (lane < low || lane >= high)
6339 /* Bounds-check lanes. */
6342 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6344 bounds_check (operand, low, high, "lane out of range");
6347 /* Bounds-check constants. */
6350 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6352 bounds_check (operand, low, high, "constant out of range");
6356 neon_element_bits (enum machine_mode mode)
6359 return GET_MODE_BITSIZE (mode);
6361 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
6365 /* Predicates for `match_operand' and `match_operator'. */
6367 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
6369 cirrus_memory_offset (rtx op)
6371 /* Reject eliminable registers. */
6372 if (! (reload_in_progress || reload_completed)
6373 && ( reg_mentioned_p (frame_pointer_rtx, op)
6374 || reg_mentioned_p (arg_pointer_rtx, op)
6375 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6376 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6377 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6378 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6381 if (GET_CODE (op) == MEM)
6387 /* Match: (mem (reg)). */
6388 if (GET_CODE (ind) == REG)
6394 if (GET_CODE (ind) == PLUS
6395 && GET_CODE (XEXP (ind, 0)) == REG
6396 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6397 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
6404 /* Return TRUE if OP is a valid coprocessor memory address pattern.
6405 WB is true if full writeback address modes are allowed and is false
6406 if limited writeback address modes (POST_INC and PRE_DEC) are
6410 arm_coproc_mem_operand (rtx op, bool wb)
6414 /* Reject eliminable registers. */
6415 if (! (reload_in_progress || reload_completed)
6416 && ( reg_mentioned_p (frame_pointer_rtx, op)
6417 || reg_mentioned_p (arg_pointer_rtx, op)
6418 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6419 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6420 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6421 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6424 /* Constants are converted into offsets from labels. */
6425 if (GET_CODE (op) != MEM)
6430 if (reload_completed
6431 && (GET_CODE (ind) == LABEL_REF
6432 || (GET_CODE (ind) == CONST
6433 && GET_CODE (XEXP (ind, 0)) == PLUS
6434 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6435 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6438 /* Match: (mem (reg)). */
6439 if (GET_CODE (ind) == REG)
6440 return arm_address_register_rtx_p (ind, 0);
6442 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
6443 acceptable in any case (subject to verification by
6444 arm_address_register_rtx_p). We need WB to be true to accept
6445 PRE_INC and POST_DEC. */
6446 if (GET_CODE (ind) == POST_INC
6447 || GET_CODE (ind) == PRE_DEC
6449 && (GET_CODE (ind) == PRE_INC
6450 || GET_CODE (ind) == POST_DEC)))
6451 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6454 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
6455 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6456 && GET_CODE (XEXP (ind, 1)) == PLUS
6457 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6458 ind = XEXP (ind, 1);
6463 if (GET_CODE (ind) == PLUS
6464 && GET_CODE (XEXP (ind, 0)) == REG
6465 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6466 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6467 && INTVAL (XEXP (ind, 1)) > -1024
6468 && INTVAL (XEXP (ind, 1)) < 1024
6469 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6475 /* Return TRUE if OP is a memory operand which we can load or store a vector
6476 to/from. If CORE is true, we're moving from ARM registers not Neon
6479 neon_vector_mem_operand (rtx op, bool core)
6483 /* Reject eliminable registers. */
6484 if (! (reload_in_progress || reload_completed)
6485 && ( reg_mentioned_p (frame_pointer_rtx, op)
6486 || reg_mentioned_p (arg_pointer_rtx, op)
6487 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6488 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6489 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6490 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6493 /* Constants are converted into offsets from labels. */
6494 if (GET_CODE (op) != MEM)
6499 if (reload_completed
6500 && (GET_CODE (ind) == LABEL_REF
6501 || (GET_CODE (ind) == CONST
6502 && GET_CODE (XEXP (ind, 0)) == PLUS
6503 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6504 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6507 /* Match: (mem (reg)). */
6508 if (GET_CODE (ind) == REG)
6509 return arm_address_register_rtx_p (ind, 0);
6511 /* Allow post-increment with Neon registers. */
6512 if (!core && GET_CODE (ind) == POST_INC)
6513 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6516 /* FIXME: We can support this too if we use VLD1/VST1. */
6518 && GET_CODE (ind) == POST_MODIFY
6519 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6520 && GET_CODE (XEXP (ind, 1)) == PLUS
6521 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6522 ind = XEXP (ind, 1);
6529 && GET_CODE (ind) == PLUS
6530 && GET_CODE (XEXP (ind, 0)) == REG
6531 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6532 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6533 && INTVAL (XEXP (ind, 1)) > -1024
6534 && INTVAL (XEXP (ind, 1)) < 1016
6535 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6541 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
6544 neon_struct_mem_operand (rtx op)
6548 /* Reject eliminable registers. */
6549 if (! (reload_in_progress || reload_completed)
6550 && ( reg_mentioned_p (frame_pointer_rtx, op)
6551 || reg_mentioned_p (arg_pointer_rtx, op)
6552 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6553 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6554 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6555 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6558 /* Constants are converted into offsets from labels. */
6559 if (GET_CODE (op) != MEM)
6564 if (reload_completed
6565 && (GET_CODE (ind) == LABEL_REF
6566 || (GET_CODE (ind) == CONST
6567 && GET_CODE (XEXP (ind, 0)) == PLUS
6568 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6569 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6572 /* Match: (mem (reg)). */
6573 if (GET_CODE (ind) == REG)
6574 return arm_address_register_rtx_p (ind, 0);
6579 /* Return true if X is a register that will be eliminated later on. */
6581 arm_eliminable_register (rtx x)
6583 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
6584 || REGNO (x) == ARG_POINTER_REGNUM
6585 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
6586 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
6589 /* Return GENERAL_REGS if a scratch register required to reload x to/from
6590 coprocessor registers. Otherwise return NO_REGS. */
6593 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
6596 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6597 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6598 && neon_vector_mem_operand (x, FALSE))
6601 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
6604 return GENERAL_REGS;
6607 /* Values which must be returned in the most-significant end of the return
6611 arm_return_in_msb (const_tree valtype)
6613 return (TARGET_AAPCS_BASED
6615 && (AGGREGATE_TYPE_P (valtype)
6616 || TREE_CODE (valtype) == COMPLEX_TYPE));
6619 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
6620 Use by the Cirrus Maverick code which has to workaround
6621 a hardware bug triggered by such instructions. */
6623 arm_memory_load_p (rtx insn)
6625 rtx body, lhs, rhs;;
6627 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
6630 body = PATTERN (insn);
6632 if (GET_CODE (body) != SET)
6635 lhs = XEXP (body, 0);
6636 rhs = XEXP (body, 1);
6638 lhs = REG_OR_SUBREG_RTX (lhs);
6640 /* If the destination is not a general purpose
6641 register we do not have to worry. */
6642 if (GET_CODE (lhs) != REG
6643 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
6646 /* As well as loads from memory we also have to react
6647 to loads of invalid constants which will be turned
6648 into loads from the minipool. */
6649 return (GET_CODE (rhs) == MEM
6650 || GET_CODE (rhs) == SYMBOL_REF
6651 || note_invalid_constants (insn, -1, false));
6654 /* Return TRUE if INSN is a Cirrus instruction. */
6656 arm_cirrus_insn_p (rtx insn)
6658 enum attr_cirrus attr;
6660 /* get_attr cannot accept USE or CLOBBER. */
6662 || GET_CODE (insn) != INSN
6663 || GET_CODE (PATTERN (insn)) == USE
6664 || GET_CODE (PATTERN (insn)) == CLOBBER)
6667 attr = get_attr_cirrus (insn);
6669 return attr != CIRRUS_NOT;
6672 /* Cirrus reorg for invalid instruction combinations. */
6674 cirrus_reorg (rtx first)
6676 enum attr_cirrus attr;
6677 rtx body = PATTERN (first);
6681 /* Any branch must be followed by 2 non Cirrus instructions. */
6682 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
6685 t = next_nonnote_insn (first);
6687 if (arm_cirrus_insn_p (t))
6690 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6694 emit_insn_after (gen_nop (), first);
6699 /* (float (blah)) is in parallel with a clobber. */
6700 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
6701 body = XVECEXP (body, 0, 0);
6703 if (GET_CODE (body) == SET)
6705 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
6707 /* cfldrd, cfldr64, cfstrd, cfstr64 must
6708 be followed by a non Cirrus insn. */
6709 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
6711 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
6712 emit_insn_after (gen_nop (), first);
6716 else if (arm_memory_load_p (first))
6718 unsigned int arm_regno;
6720 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
6721 ldr/cfmv64hr combination where the Rd field is the same
6722 in both instructions must be split with a non Cirrus
6729 /* Get Arm register number for ldr insn. */
6730 if (GET_CODE (lhs) == REG)
6731 arm_regno = REGNO (lhs);
6734 gcc_assert (GET_CODE (rhs) == REG);
6735 arm_regno = REGNO (rhs);
6739 first = next_nonnote_insn (first);
6741 if (! arm_cirrus_insn_p (first))
6744 body = PATTERN (first);
6746 /* (float (blah)) is in parallel with a clobber. */
6747 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
6748 body = XVECEXP (body, 0, 0);
6750 if (GET_CODE (body) == FLOAT)
6751 body = XEXP (body, 0);
6753 if (get_attr_cirrus (first) == CIRRUS_MOVE
6754 && GET_CODE (XEXP (body, 1)) == REG
6755 && arm_regno == REGNO (XEXP (body, 1)))
6756 emit_insn_after (gen_nop (), first);
6762 /* get_attr cannot accept USE or CLOBBER. */
6764 || GET_CODE (first) != INSN
6765 || GET_CODE (PATTERN (first)) == USE
6766 || GET_CODE (PATTERN (first)) == CLOBBER)
6769 attr = get_attr_cirrus (first);
6771 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
6772 must be followed by a non-coprocessor instruction. */
6773 if (attr == CIRRUS_COMPARE)
6777 t = next_nonnote_insn (first);
6779 if (arm_cirrus_insn_p (t))
6782 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6786 emit_insn_after (gen_nop (), first);
6792 /* Return TRUE if X references a SYMBOL_REF. */
6794 symbol_mentioned_p (rtx x)
6799 if (GET_CODE (x) == SYMBOL_REF)
6802 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
6803 are constant offsets, not symbols. */
6804 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6807 fmt = GET_RTX_FORMAT (GET_CODE (x));
6809 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6815 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6816 if (symbol_mentioned_p (XVECEXP (x, i, j)))
6819 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
6826 /* Return TRUE if X references a LABEL_REF. */
6828 label_mentioned_p (rtx x)
6833 if (GET_CODE (x) == LABEL_REF)
6836 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
6837 instruction, but they are constant offsets, not symbols. */
6838 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6841 fmt = GET_RTX_FORMAT (GET_CODE (x));
6842 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6848 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6849 if (label_mentioned_p (XVECEXP (x, i, j)))
6852 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
6860 tls_mentioned_p (rtx x)
6862 switch (GET_CODE (x))
6865 return tls_mentioned_p (XEXP (x, 0));
6868 if (XINT (x, 1) == UNSPEC_TLS)
6876 /* Must not copy a SET whose source operand is PC-relative. */
6879 arm_cannot_copy_insn_p (rtx insn)
6881 rtx pat = PATTERN (insn);
6883 if (GET_CODE (pat) == SET)
6885 rtx rhs = SET_SRC (pat);
6887 if (GET_CODE (rhs) == UNSPEC
6888 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
6891 if (GET_CODE (rhs) == MEM
6892 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
6893 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
6903 enum rtx_code code = GET_CODE (x);
6920 /* Return 1 if memory locations are adjacent. */
6922 adjacent_mem_locations (rtx a, rtx b)
6924 /* We don't guarantee to preserve the order of these memory refs. */
6925 if (volatile_refs_p (a) || volatile_refs_p (b))
6928 if ((GET_CODE (XEXP (a, 0)) == REG
6929 || (GET_CODE (XEXP (a, 0)) == PLUS
6930 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
6931 && (GET_CODE (XEXP (b, 0)) == REG
6932 || (GET_CODE (XEXP (b, 0)) == PLUS
6933 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
6935 HOST_WIDE_INT val0 = 0, val1 = 0;
6939 if (GET_CODE (XEXP (a, 0)) == PLUS)
6941 reg0 = XEXP (XEXP (a, 0), 0);
6942 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
6947 if (GET_CODE (XEXP (b, 0)) == PLUS)
6949 reg1 = XEXP (XEXP (b, 0), 0);
6950 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
6955 /* Don't accept any offset that will require multiple
6956 instructions to handle, since this would cause the
6957 arith_adjacentmem pattern to output an overlong sequence. */
6958 if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
6961 /* Don't allow an eliminable register: register elimination can make
6962 the offset too large. */
6963 if (arm_eliminable_register (reg0))
6966 val_diff = val1 - val0;
6970 /* If the target has load delay slots, then there's no benefit
6971 to using an ldm instruction unless the offset is zero and
6972 we are optimizing for size. */
6973 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
6974 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
6975 && (val_diff == 4 || val_diff == -4));
6978 return ((REGNO (reg0) == REGNO (reg1))
6979 && (val_diff == 4 || val_diff == -4));
6986 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
6987 HOST_WIDE_INT *load_offset)
6989 int unsorted_regs[4];
6990 HOST_WIDE_INT unsorted_offsets[4];
6995 /* Can only handle 2, 3, or 4 insns at present,
6996 though could be easily extended if required. */
6997 gcc_assert (nops >= 2 && nops <= 4);
6999 /* Loop over the operands and check that the memory references are
7000 suitable (i.e. immediate offsets from the same base register). At
7001 the same time, extract the target register, and the memory
7003 for (i = 0; i < nops; i++)
7008 /* Convert a subreg of a mem into the mem itself. */
7009 if (GET_CODE (operands[nops + i]) == SUBREG)
7010 operands[nops + i] = alter_subreg (operands + (nops + i));
7012 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7014 /* Don't reorder volatile memory references; it doesn't seem worth
7015 looking for the case where the order is ok anyway. */
7016 if (MEM_VOLATILE_P (operands[nops + i]))
7019 offset = const0_rtx;
7021 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7022 || (GET_CODE (reg) == SUBREG
7023 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7024 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7025 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7027 || (GET_CODE (reg) == SUBREG
7028 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7029 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7034 base_reg = REGNO (reg);
7035 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7036 ? REGNO (operands[i])
7037 : REGNO (SUBREG_REG (operands[i])));
7042 if (base_reg != (int) REGNO (reg))
7043 /* Not addressed from the same base register. */
7046 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7047 ? REGNO (operands[i])
7048 : REGNO (SUBREG_REG (operands[i])));
7049 if (unsorted_regs[i] < unsorted_regs[order[0]])
7053 /* If it isn't an integer register, or if it overwrites the
7054 base register but isn't the last insn in the list, then
7055 we can't do this. */
7056 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
7057 || (i != nops - 1 && unsorted_regs[i] == base_reg))
7060 unsorted_offsets[i] = INTVAL (offset);
7063 /* Not a suitable memory address. */
7067 /* All the useful information has now been extracted from the
7068 operands into unsorted_regs and unsorted_offsets; additionally,
7069 order[0] has been set to the lowest numbered register in the
7070 list. Sort the registers into order, and check that the memory
7071 offsets are ascending and adjacent. */
7073 for (i = 1; i < nops; i++)
7077 order[i] = order[i - 1];
7078 for (j = 0; j < nops; j++)
7079 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7080 && (order[i] == order[i - 1]
7081 || unsorted_regs[j] < unsorted_regs[order[i]]))
7084 /* Have we found a suitable register? if not, one must be used more
7086 if (order[i] == order[i - 1])
7089 /* Is the memory address adjacent and ascending? */
7090 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7098 for (i = 0; i < nops; i++)
7099 regs[i] = unsorted_regs[order[i]];
7101 *load_offset = unsorted_offsets[order[0]];
7104 if (unsorted_offsets[order[0]] == 0)
7105 return 1; /* ldmia */
7107 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
7108 return 2; /* ldmib */
7110 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
7111 return 3; /* ldmda */
7113 if (unsorted_offsets[order[nops - 1]] == -4)
7114 return 4; /* ldmdb */
7116 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
7117 if the offset isn't small enough. The reason 2 ldrs are faster
7118 is because these ARMs are able to do more than one cache access
7119 in a single cycle. The ARM9 and StrongARM have Harvard caches,
7120 whilst the ARM8 has a double bandwidth cache. This means that
7121 these cores can do both an instruction fetch and a data fetch in
7122 a single cycle, so the trick of calculating the address into a
7123 scratch register (one of the result regs) and then doing a load
7124 multiple actually becomes slower (and no smaller in code size).
7125 That is the transformation
7127 ldr rd1, [rbase + offset]
7128 ldr rd2, [rbase + offset + 4]
7132 add rd1, rbase, offset
7133 ldmia rd1, {rd1, rd2}
7135 produces worse code -- '3 cycles + any stalls on rd2' instead of
7136 '2 cycles + any stalls on rd2'. On ARMs with only one cache
7137 access per cycle, the first sequence could never complete in less
7138 than 6 cycles, whereas the ldm sequence would only take 5 and
7139 would make better use of sequential accesses if not hitting the
7142 We cheat here and test 'arm_ld_sched' which we currently know to
7143 only be true for the ARM8, ARM9 and StrongARM. If this ever
7144 changes, then the test below needs to be reworked. */
7145 if (nops == 2 && arm_ld_sched)
7148 /* Can't do it without setting up the offset, only do this if it takes
7149 no more than one insn. */
7150 return (const_ok_for_arm (unsorted_offsets[order[0]])
7151 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
7155 emit_ldm_seq (rtx *operands, int nops)
7159 HOST_WIDE_INT offset;
7163 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7166 strcpy (buf, "ldm%(ia%)\t");
7170 strcpy (buf, "ldm%(ib%)\t");
7174 strcpy (buf, "ldm%(da%)\t");
7178 strcpy (buf, "ldm%(db%)\t");
7183 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7184 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7187 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7188 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7190 output_asm_insn (buf, operands);
7192 strcpy (buf, "ldm%(ia%)\t");
7199 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7200 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7202 for (i = 1; i < nops; i++)
7203 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7204 reg_names[regs[i]]);
7206 strcat (buf, "}\t%@ phole ldm");
7208 output_asm_insn (buf, operands);
7213 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7214 HOST_WIDE_INT * load_offset)
7216 int unsorted_regs[4];
7217 HOST_WIDE_INT unsorted_offsets[4];
7222 /* Can only handle 2, 3, or 4 insns at present, though could be easily
7223 extended if required. */
7224 gcc_assert (nops >= 2 && nops <= 4);
7226 /* Loop over the operands and check that the memory references are
7227 suitable (i.e. immediate offsets from the same base register). At
7228 the same time, extract the target register, and the memory
7230 for (i = 0; i < nops; i++)
7235 /* Convert a subreg of a mem into the mem itself. */
7236 if (GET_CODE (operands[nops + i]) == SUBREG)
7237 operands[nops + i] = alter_subreg (operands + (nops + i));
7239 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7241 /* Don't reorder volatile memory references; it doesn't seem worth
7242 looking for the case where the order is ok anyway. */
7243 if (MEM_VOLATILE_P (operands[nops + i]))
7246 offset = const0_rtx;
7248 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7249 || (GET_CODE (reg) == SUBREG
7250 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7251 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7252 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7254 || (GET_CODE (reg) == SUBREG
7255 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7256 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7261 base_reg = REGNO (reg);
7262 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7263 ? REGNO (operands[i])
7264 : REGNO (SUBREG_REG (operands[i])));
7269 if (base_reg != (int) REGNO (reg))
7270 /* Not addressed from the same base register. */
7273 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7274 ? REGNO (operands[i])
7275 : REGNO (SUBREG_REG (operands[i])));
7276 if (unsorted_regs[i] < unsorted_regs[order[0]])
7280 /* If it isn't an integer register, then we can't do this. */
7281 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
7284 unsorted_offsets[i] = INTVAL (offset);
7287 /* Not a suitable memory address. */
7291 /* All the useful information has now been extracted from the
7292 operands into unsorted_regs and unsorted_offsets; additionally,
7293 order[0] has been set to the lowest numbered register in the
7294 list. Sort the registers into order, and check that the memory
7295 offsets are ascending and adjacent. */
7297 for (i = 1; i < nops; i++)
7301 order[i] = order[i - 1];
7302 for (j = 0; j < nops; j++)
7303 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7304 && (order[i] == order[i - 1]
7305 || unsorted_regs[j] < unsorted_regs[order[i]]))
7308 /* Have we found a suitable register? if not, one must be used more
7310 if (order[i] == order[i - 1])
7313 /* Is the memory address adjacent and ascending? */
7314 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7322 for (i = 0; i < nops; i++)
7323 regs[i] = unsorted_regs[order[i]];
7325 *load_offset = unsorted_offsets[order[0]];
7328 if (unsorted_offsets[order[0]] == 0)
7329 return 1; /* stmia */
7331 if (unsorted_offsets[order[0]] == 4)
7332 return 2; /* stmib */
7334 if (unsorted_offsets[order[nops - 1]] == 0)
7335 return 3; /* stmda */
7337 if (unsorted_offsets[order[nops - 1]] == -4)
7338 return 4; /* stmdb */
7344 emit_stm_seq (rtx *operands, int nops)
7348 HOST_WIDE_INT offset;
7352 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7355 strcpy (buf, "stm%(ia%)\t");
7359 strcpy (buf, "stm%(ib%)\t");
7363 strcpy (buf, "stm%(da%)\t");
7367 strcpy (buf, "stm%(db%)\t");
7374 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7375 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7377 for (i = 1; i < nops; i++)
7378 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7379 reg_names[regs[i]]);
7381 strcat (buf, "}\t%@ phole stm");
7383 output_asm_insn (buf, operands);
7387 /* Routines for use in generating RTL. */
7390 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
7391 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7393 HOST_WIDE_INT offset = *offsetp;
7396 int sign = up ? 1 : -1;
7399 /* XScale has load-store double instructions, but they have stricter
7400 alignment requirements than load-store multiple, so we cannot
7403 For XScale ldm requires 2 + NREGS cycles to complete and blocks
7404 the pipeline until completion.
7412 An ldr instruction takes 1-3 cycles, but does not block the
7421 Best case ldr will always win. However, the more ldr instructions
7422 we issue, the less likely we are to be able to schedule them well.
7423 Using ldr instructions also increases code size.
7425 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
7426 for counts of 3 or 4 regs. */
7427 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7433 for (i = 0; i < count; i++)
7435 addr = plus_constant (from, i * 4 * sign);
7436 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7437 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
7443 emit_move_insn (from, plus_constant (from, count * 4 * sign));
7453 result = gen_rtx_PARALLEL (VOIDmode,
7454 rtvec_alloc (count + (write_back ? 1 : 0)));
7457 XVECEXP (result, 0, 0)
7458 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
7463 for (j = 0; i < count; i++, j++)
7465 addr = plus_constant (from, j * 4 * sign);
7466 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7467 XVECEXP (result, 0, i)
7468 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
7479 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
7480 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7482 HOST_WIDE_INT offset = *offsetp;
7485 int sign = up ? 1 : -1;
7488 /* See arm_gen_load_multiple for discussion of
7489 the pros/cons of ldm/stm usage for XScale. */
7490 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7496 for (i = 0; i < count; i++)
7498 addr = plus_constant (to, i * 4 * sign);
7499 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7500 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
7506 emit_move_insn (to, plus_constant (to, count * 4 * sign));
7516 result = gen_rtx_PARALLEL (VOIDmode,
7517 rtvec_alloc (count + (write_back ? 1 : 0)));
7520 XVECEXP (result, 0, 0)
7521 = gen_rtx_SET (VOIDmode, to,
7522 plus_constant (to, count * 4 * sign));
7527 for (j = 0; i < count; i++, j++)
7529 addr = plus_constant (to, j * 4 * sign);
7530 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7531 XVECEXP (result, 0, i)
7532 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
7543 arm_gen_movmemqi (rtx *operands)
7545 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
7546 HOST_WIDE_INT srcoffset, dstoffset;
7548 rtx src, dst, srcbase, dstbase;
7549 rtx part_bytes_reg = NULL;
7552 if (GET_CODE (operands[2]) != CONST_INT
7553 || GET_CODE (operands[3]) != CONST_INT
7554 || INTVAL (operands[2]) > 64
7555 || INTVAL (operands[3]) & 3)
7558 dstbase = operands[0];
7559 srcbase = operands[1];
7561 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
7562 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
7564 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
7565 out_words_to_go = INTVAL (operands[2]) / 4;
7566 last_bytes = INTVAL (operands[2]) & 3;
7567 dstoffset = srcoffset = 0;
7569 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
7570 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
7572 for (i = 0; in_words_to_go >= 2; i+=4)
7574 if (in_words_to_go > 4)
7575 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
7576 srcbase, &srcoffset));
7578 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
7579 FALSE, srcbase, &srcoffset));
7581 if (out_words_to_go)
7583 if (out_words_to_go > 4)
7584 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
7585 dstbase, &dstoffset));
7586 else if (out_words_to_go != 1)
7587 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
7591 dstbase, &dstoffset));
7594 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7595 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
7596 if (last_bytes != 0)
7598 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
7604 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
7605 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
7608 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
7609 if (out_words_to_go)
7613 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7614 sreg = copy_to_reg (mem);
7616 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7617 emit_move_insn (mem, sreg);
7620 gcc_assert (!in_words_to_go); /* Sanity check */
7625 gcc_assert (in_words_to_go > 0);
7627 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7628 part_bytes_reg = copy_to_mode_reg (SImode, mem);
7631 gcc_assert (!last_bytes || part_bytes_reg);
7633 if (BYTES_BIG_ENDIAN && last_bytes)
7635 rtx tmp = gen_reg_rtx (SImode);
7637 /* The bytes we want are in the top end of the word. */
7638 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
7639 GEN_INT (8 * (4 - last_bytes))));
7640 part_bytes_reg = tmp;
7644 mem = adjust_automodify_address (dstbase, QImode,
7645 plus_constant (dst, last_bytes - 1),
7646 dstoffset + last_bytes - 1);
7647 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7651 tmp = gen_reg_rtx (SImode);
7652 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
7653 part_bytes_reg = tmp;
7662 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
7663 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
7667 rtx tmp = gen_reg_rtx (SImode);
7668 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
7669 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
7670 part_bytes_reg = tmp;
7677 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
7678 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7685 /* Select a dominance comparison mode if possible for a test of the general
7686 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
7687 COND_OR == DOM_CC_X_AND_Y => (X && Y)
7688 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
7689 COND_OR == DOM_CC_X_OR_Y => (X || Y)
7690 In all cases OP will be either EQ or NE, but we don't need to know which
7691 here. If we are unable to support a dominance comparison we return
7692 CC mode. This will then fail to match for the RTL expressions that
7693 generate this call. */
7695 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
7697 enum rtx_code cond1, cond2;
7700 /* Currently we will probably get the wrong result if the individual
7701 comparisons are not simple. This also ensures that it is safe to
7702 reverse a comparison if necessary. */
7703 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
7705 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
7709 /* The if_then_else variant of this tests the second condition if the
7710 first passes, but is true if the first fails. Reverse the first
7711 condition to get a true "inclusive-or" expression. */
7712 if (cond_or == DOM_CC_NX_OR_Y)
7713 cond1 = reverse_condition (cond1);
7715 /* If the comparisons are not equal, and one doesn't dominate the other,
7716 then we can't do this. */
7718 && !comparison_dominates_p (cond1, cond2)
7719 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
7724 enum rtx_code temp = cond1;
7732 if (cond_or == DOM_CC_X_AND_Y)
7737 case EQ: return CC_DEQmode;
7738 case LE: return CC_DLEmode;
7739 case LEU: return CC_DLEUmode;
7740 case GE: return CC_DGEmode;
7741 case GEU: return CC_DGEUmode;
7742 default: gcc_unreachable ();
7746 if (cond_or == DOM_CC_X_AND_Y)
7762 if (cond_or == DOM_CC_X_AND_Y)
7778 if (cond_or == DOM_CC_X_AND_Y)
7794 if (cond_or == DOM_CC_X_AND_Y)
7809 /* The remaining cases only occur when both comparisons are the
7812 gcc_assert (cond1 == cond2);
7816 gcc_assert (cond1 == cond2);
7820 gcc_assert (cond1 == cond2);
7824 gcc_assert (cond1 == cond2);
7828 gcc_assert (cond1 == cond2);
7837 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
7839 /* All floating point compares return CCFP if it is an equality
7840 comparison, and CCFPE otherwise. */
7841 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
7861 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
7870 /* A compare with a shifted operand. Because of canonicalization, the
7871 comparison will have to be swapped when we emit the assembler. */
7872 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
7873 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7874 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
7875 || GET_CODE (x) == ROTATERT))
7878 /* This operation is performed swapped, but since we only rely on the Z
7879 flag we don't need an additional mode. */
7880 if (GET_MODE (y) == SImode && REG_P (y)
7881 && GET_CODE (x) == NEG
7882 && (op == EQ || op == NE))
7885 /* This is a special case that is used by combine to allow a
7886 comparison of a shifted byte load to be split into a zero-extend
7887 followed by a comparison of the shifted integer (only valid for
7888 equalities and unsigned inequalities). */
7889 if (GET_MODE (x) == SImode
7890 && GET_CODE (x) == ASHIFT
7891 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
7892 && GET_CODE (XEXP (x, 0)) == SUBREG
7893 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
7894 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
7895 && (op == EQ || op == NE
7896 || op == GEU || op == GTU || op == LTU || op == LEU)
7897 && GET_CODE (y) == CONST_INT)
7900 /* A construct for a conditional compare, if the false arm contains
7901 0, then both conditions must be true, otherwise either condition
7902 must be true. Not all conditions are possible, so CCmode is
7903 returned if it can't be done. */
7904 if (GET_CODE (x) == IF_THEN_ELSE
7905 && (XEXP (x, 2) == const0_rtx
7906 || XEXP (x, 2) == const1_rtx)
7907 && COMPARISON_P (XEXP (x, 0))
7908 && COMPARISON_P (XEXP (x, 1)))
7909 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7910 INTVAL (XEXP (x, 2)));
7912 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
7913 if (GET_CODE (x) == AND
7914 && COMPARISON_P (XEXP (x, 0))
7915 && COMPARISON_P (XEXP (x, 1)))
7916 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7919 if (GET_CODE (x) == IOR
7920 && COMPARISON_P (XEXP (x, 0))
7921 && COMPARISON_P (XEXP (x, 1)))
7922 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7925 /* An operation (on Thumb) where we want to test for a single bit.
7926 This is done by shifting that bit up into the top bit of a
7927 scratch register; we can then branch on the sign bit. */
7929 && GET_MODE (x) == SImode
7930 && (op == EQ || op == NE)
7931 && GET_CODE (x) == ZERO_EXTRACT
7932 && XEXP (x, 1) == const1_rtx)
7935 /* An operation that sets the condition codes as a side-effect, the
7936 V flag is not set correctly, so we can only use comparisons where
7937 this doesn't matter. (For LT and GE we can use "mi" and "pl"
7939 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
7940 if (GET_MODE (x) == SImode
7942 && (op == EQ || op == NE || op == LT || op == GE)
7943 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
7944 || GET_CODE (x) == AND || GET_CODE (x) == IOR
7945 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
7946 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
7947 || GET_CODE (x) == LSHIFTRT
7948 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7949 || GET_CODE (x) == ROTATERT
7950 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
7953 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
7956 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
7957 && GET_CODE (x) == PLUS
7958 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
7964 /* X and Y are two things to compare using CODE. Emit the compare insn and
7965 return the rtx for register 0 in the proper mode. FP means this is a
7966 floating point compare: I don't think that it is needed on the arm. */
7968 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
7970 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
7971 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
7973 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
7978 /* Generate a sequence of insns that will generate the correct return
7979 address mask depending on the physical architecture that the program
7982 arm_gen_return_addr_mask (void)
7984 rtx reg = gen_reg_rtx (Pmode);
7986 emit_insn (gen_return_addr_mask (reg));
7991 arm_reload_in_hi (rtx *operands)
7993 rtx ref = operands[1];
7995 HOST_WIDE_INT offset = 0;
7997 if (GET_CODE (ref) == SUBREG)
7999 offset = SUBREG_BYTE (ref);
8000 ref = SUBREG_REG (ref);
8003 if (GET_CODE (ref) == REG)
8005 /* We have a pseudo which has been spilt onto the stack; there
8006 are two cases here: the first where there is a simple
8007 stack-slot replacement and a second where the stack-slot is
8008 out of range, or is used as a subreg. */
8009 if (reg_equiv_mem[REGNO (ref)])
8011 ref = reg_equiv_mem[REGNO (ref)];
8012 base = find_replacement (&XEXP (ref, 0));
8015 /* The slot is out of range, or was dressed up in a SUBREG. */
8016 base = reg_equiv_address[REGNO (ref)];
8019 base = find_replacement (&XEXP (ref, 0));
8021 /* Handle the case where the address is too complex to be offset by 1. */
8022 if (GET_CODE (base) == MINUS
8023 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8025 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8027 emit_set_insn (base_plus, base);
8030 else if (GET_CODE (base) == PLUS)
8032 /* The addend must be CONST_INT, or we would have dealt with it above. */
8033 HOST_WIDE_INT hi, lo;
8035 offset += INTVAL (XEXP (base, 1));
8036 base = XEXP (base, 0);
8038 /* Rework the address into a legal sequence of insns. */
8039 /* Valid range for lo is -4095 -> 4095 */
8042 : -((-offset) & 0xfff));
8044 /* Corner case, if lo is the max offset then we would be out of range
8045 once we have added the additional 1 below, so bump the msb into the
8046 pre-loading insn(s). */
8050 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8051 ^ (HOST_WIDE_INT) 0x80000000)
8052 - (HOST_WIDE_INT) 0x80000000);
8054 gcc_assert (hi + lo == offset);
8058 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8060 /* Get the base address; addsi3 knows how to handle constants
8061 that require more than one insn. */
8062 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8068 /* Operands[2] may overlap operands[0] (though it won't overlap
8069 operands[1]), that's why we asked for a DImode reg -- so we can
8070 use the bit that does not overlap. */
8071 if (REGNO (operands[2]) == REGNO (operands[0]))
8072 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8074 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8076 emit_insn (gen_zero_extendqisi2 (scratch,
8077 gen_rtx_MEM (QImode,
8078 plus_constant (base,
8080 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
8081 gen_rtx_MEM (QImode,
8082 plus_constant (base,
8084 if (!BYTES_BIG_ENDIAN)
8085 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8086 gen_rtx_IOR (SImode,
8089 gen_rtx_SUBREG (SImode, operands[0], 0),
8093 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8094 gen_rtx_IOR (SImode,
8095 gen_rtx_ASHIFT (SImode, scratch,
8097 gen_rtx_SUBREG (SImode, operands[0], 0)));
8100 /* Handle storing a half-word to memory during reload by synthesizing as two
8101 byte stores. Take care not to clobber the input values until after we
8102 have moved them somewhere safe. This code assumes that if the DImode
8103 scratch in operands[2] overlaps either the input value or output address
8104 in some way, then that value must die in this insn (we absolutely need
8105 two scratch registers for some corner cases). */
8107 arm_reload_out_hi (rtx *operands)
8109 rtx ref = operands[0];
8110 rtx outval = operands[1];
8112 HOST_WIDE_INT offset = 0;
8114 if (GET_CODE (ref) == SUBREG)
8116 offset = SUBREG_BYTE (ref);
8117 ref = SUBREG_REG (ref);
8120 if (GET_CODE (ref) == REG)
8122 /* We have a pseudo which has been spilt onto the stack; there
8123 are two cases here: the first where there is a simple
8124 stack-slot replacement and a second where the stack-slot is
8125 out of range, or is used as a subreg. */
8126 if (reg_equiv_mem[REGNO (ref)])
8128 ref = reg_equiv_mem[REGNO (ref)];
8129 base = find_replacement (&XEXP (ref, 0));
8132 /* The slot is out of range, or was dressed up in a SUBREG. */
8133 base = reg_equiv_address[REGNO (ref)];
8136 base = find_replacement (&XEXP (ref, 0));
8138 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8140 /* Handle the case where the address is too complex to be offset by 1. */
8141 if (GET_CODE (base) == MINUS
8142 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8144 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8146 /* Be careful not to destroy OUTVAL. */
8147 if (reg_overlap_mentioned_p (base_plus, outval))
8149 /* Updating base_plus might destroy outval, see if we can
8150 swap the scratch and base_plus. */
8151 if (!reg_overlap_mentioned_p (scratch, outval))
8154 scratch = base_plus;
8159 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8161 /* Be conservative and copy OUTVAL into the scratch now,
8162 this should only be necessary if outval is a subreg
8163 of something larger than a word. */
8164 /* XXX Might this clobber base? I can't see how it can,
8165 since scratch is known to overlap with OUTVAL, and
8166 must be wider than a word. */
8167 emit_insn (gen_movhi (scratch_hi, outval));
8168 outval = scratch_hi;
8172 emit_set_insn (base_plus, base);
8175 else if (GET_CODE (base) == PLUS)
8177 /* The addend must be CONST_INT, or we would have dealt with it above. */
8178 HOST_WIDE_INT hi, lo;
8180 offset += INTVAL (XEXP (base, 1));
8181 base = XEXP (base, 0);
8183 /* Rework the address into a legal sequence of insns. */
8184 /* Valid range for lo is -4095 -> 4095 */
8187 : -((-offset) & 0xfff));
8189 /* Corner case, if lo is the max offset then we would be out of range
8190 once we have added the additional 1 below, so bump the msb into the
8191 pre-loading insn(s). */
8195 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8196 ^ (HOST_WIDE_INT) 0x80000000)
8197 - (HOST_WIDE_INT) 0x80000000);
8199 gcc_assert (hi + lo == offset);
8203 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8205 /* Be careful not to destroy OUTVAL. */
8206 if (reg_overlap_mentioned_p (base_plus, outval))
8208 /* Updating base_plus might destroy outval, see if we
8209 can swap the scratch and base_plus. */
8210 if (!reg_overlap_mentioned_p (scratch, outval))
8213 scratch = base_plus;
8218 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8220 /* Be conservative and copy outval into scratch now,
8221 this should only be necessary if outval is a
8222 subreg of something larger than a word. */
8223 /* XXX Might this clobber base? I can't see how it
8224 can, since scratch is known to overlap with
8226 emit_insn (gen_movhi (scratch_hi, outval));
8227 outval = scratch_hi;
8231 /* Get the base address; addsi3 knows how to handle constants
8232 that require more than one insn. */
8233 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8239 if (BYTES_BIG_ENDIAN)
8241 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8242 plus_constant (base, offset + 1)),
8243 gen_lowpart (QImode, outval)));
8244 emit_insn (gen_lshrsi3 (scratch,
8245 gen_rtx_SUBREG (SImode, outval, 0),
8247 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8248 gen_lowpart (QImode, scratch)));
8252 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8253 gen_lowpart (QImode, outval)));
8254 emit_insn (gen_lshrsi3 (scratch,
8255 gen_rtx_SUBREG (SImode, outval, 0),
8257 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8258 plus_constant (base, offset + 1)),
8259 gen_lowpart (QImode, scratch)));
8263 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
8264 (padded to the size of a word) should be passed in a register. */
8267 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
8269 if (TARGET_AAPCS_BASED)
8270 return must_pass_in_stack_var_size (mode, type);
8272 return must_pass_in_stack_var_size_or_pad (mode, type);
8276 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
8277 Return true if an argument passed on the stack should be padded upwards,
8278 i.e. if the least-significant byte has useful data.
8279 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
8280 aggregate types are placed in the lowest memory address. */
8283 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
8285 if (!TARGET_AAPCS_BASED)
8286 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
8288 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
8295 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
8296 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
8297 byte of the register has useful data, and return the opposite if the
8298 most significant byte does.
8299 For AAPCS, small aggregates and small complex types are always padded
8303 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
8304 tree type, int first ATTRIBUTE_UNUSED)
8306 if (TARGET_AAPCS_BASED
8308 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
8309 && int_size_in_bytes (type) <= 4)
8312 /* Otherwise, use default padding. */
8313 return !BYTES_BIG_ENDIAN;
8317 /* Print a symbolic form of X to the debug file, F. */
8319 arm_print_value (FILE *f, rtx x)
8321 switch (GET_CODE (x))
8324 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
8328 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
8336 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
8338 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
8339 if (i < (CONST_VECTOR_NUNITS (x) - 1))
8347 fprintf (f, "\"%s\"", XSTR (x, 0));
8351 fprintf (f, "`%s'", XSTR (x, 0));
8355 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
8359 arm_print_value (f, XEXP (x, 0));
8363 arm_print_value (f, XEXP (x, 0));
8365 arm_print_value (f, XEXP (x, 1));
8373 fprintf (f, "????");
8378 /* Routines for manipulation of the constant pool. */
8380 /* Arm instructions cannot load a large constant directly into a
8381 register; they have to come from a pc relative load. The constant
8382 must therefore be placed in the addressable range of the pc
8383 relative load. Depending on the precise pc relative load
8384 instruction the range is somewhere between 256 bytes and 4k. This
8385 means that we often have to dump a constant inside a function, and
8386 generate code to branch around it.
8388 It is important to minimize this, since the branches will slow
8389 things down and make the code larger.
8391 Normally we can hide the table after an existing unconditional
8392 branch so that there is no interruption of the flow, but in the
8393 worst case the code looks like this:
8411 We fix this by performing a scan after scheduling, which notices
8412 which instructions need to have their operands fetched from the
8413 constant table and builds the table.
8415 The algorithm starts by building a table of all the constants that
8416 need fixing up and all the natural barriers in the function (places
8417 where a constant table can be dropped without breaking the flow).
8418 For each fixup we note how far the pc-relative replacement will be
8419 able to reach and the offset of the instruction into the function.
8421 Having built the table we then group the fixes together to form
8422 tables that are as large as possible (subject to addressing
8423 constraints) and emit each table of constants after the last
8424 barrier that is within range of all the instructions in the group.
8425 If a group does not contain a barrier, then we forcibly create one
8426 by inserting a jump instruction into the flow. Once the table has
8427 been inserted, the insns are then modified to reference the
8428 relevant entry in the pool.
8430 Possible enhancements to the algorithm (not implemented) are:
8432 1) For some processors and object formats, there may be benefit in
8433 aligning the pools to the start of cache lines; this alignment
8434 would need to be taken into account when calculating addressability
8437 /* These typedefs are located at the start of this file, so that
8438 they can be used in the prototypes there. This comment is to
8439 remind readers of that fact so that the following structures
8440 can be understood more easily.
8442 typedef struct minipool_node Mnode;
8443 typedef struct minipool_fixup Mfix; */
8445 struct minipool_node
8447 /* Doubly linked chain of entries. */
8450 /* The maximum offset into the code that this entry can be placed. While
8451 pushing fixes for forward references, all entries are sorted in order
8452 of increasing max_address. */
8453 HOST_WIDE_INT max_address;
8454 /* Similarly for an entry inserted for a backwards ref. */
8455 HOST_WIDE_INT min_address;
8456 /* The number of fixes referencing this entry. This can become zero
8457 if we "unpush" an entry. In this case we ignore the entry when we
8458 come to emit the code. */
8460 /* The offset from the start of the minipool. */
8461 HOST_WIDE_INT offset;
8462 /* The value in table. */
8464 /* The mode of value. */
8465 enum machine_mode mode;
8466 /* The size of the value. With iWMMXt enabled
8467 sizes > 4 also imply an alignment of 8-bytes. */
8471 struct minipool_fixup
8475 HOST_WIDE_INT address;
8477 enum machine_mode mode;
8481 HOST_WIDE_INT forwards;
8482 HOST_WIDE_INT backwards;
8485 /* Fixes less than a word need padding out to a word boundary. */
8486 #define MINIPOOL_FIX_SIZE(mode) \
8487 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
8489 static Mnode * minipool_vector_head;
8490 static Mnode * minipool_vector_tail;
8491 static rtx minipool_vector_label;
8492 static int minipool_pad;
8494 /* The linked list of all minipool fixes required for this function. */
8495 Mfix * minipool_fix_head;
8496 Mfix * minipool_fix_tail;
8497 /* The fix entry for the current minipool, once it has been placed. */
8498 Mfix * minipool_barrier;
8500 /* Determines if INSN is the start of a jump table. Returns the end
8501 of the TABLE or NULL_RTX. */
8503 is_jump_table (rtx insn)
8507 if (GET_CODE (insn) == JUMP_INSN
8508 && JUMP_LABEL (insn) != NULL
8509 && ((table = next_real_insn (JUMP_LABEL (insn)))
8510 == next_real_insn (insn))
8512 && GET_CODE (table) == JUMP_INSN
8513 && (GET_CODE (PATTERN (table)) == ADDR_VEC
8514 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
8520 #ifndef JUMP_TABLES_IN_TEXT_SECTION
8521 #define JUMP_TABLES_IN_TEXT_SECTION 0
8524 static HOST_WIDE_INT
8525 get_jump_table_size (rtx insn)
8527 /* ADDR_VECs only take room if read-only data does into the text
8529 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
8531 rtx body = PATTERN (insn);
8532 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
8534 HOST_WIDE_INT modesize;
8536 modesize = GET_MODE_SIZE (GET_MODE (body));
8537 size = modesize * XVECLEN (body, elt);
8541 /* Round up size of TBB table to a halfword boundary. */
8542 size = (size + 1) & ~(HOST_WIDE_INT)1;
8545 /* No padding necessary for TBH. */
8548 /* Add two bytes for alignment on Thumb. */
8561 /* Move a minipool fix MP from its current location to before MAX_MP.
8562 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
8563 constraints may need updating. */
8565 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
8566 HOST_WIDE_INT max_address)
8568 /* The code below assumes these are different. */
8569 gcc_assert (mp != max_mp);
8573 if (max_address < mp->max_address)
8574 mp->max_address = max_address;
8578 if (max_address > max_mp->max_address - mp->fix_size)
8579 mp->max_address = max_mp->max_address - mp->fix_size;
8581 mp->max_address = max_address;
8583 /* Unlink MP from its current position. Since max_mp is non-null,
8584 mp->prev must be non-null. */
8585 mp->prev->next = mp->next;
8586 if (mp->next != NULL)
8587 mp->next->prev = mp->prev;
8589 minipool_vector_tail = mp->prev;
8591 /* Re-insert it before MAX_MP. */
8593 mp->prev = max_mp->prev;
8596 if (mp->prev != NULL)
8597 mp->prev->next = mp;
8599 minipool_vector_head = mp;
8602 /* Save the new entry. */
8605 /* Scan over the preceding entries and adjust their addresses as
8607 while (mp->prev != NULL
8608 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8610 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8617 /* Add a constant to the minipool for a forward reference. Returns the
8618 node added or NULL if the constant will not fit in this pool. */
8620 add_minipool_forward_ref (Mfix *fix)
8622 /* If set, max_mp is the first pool_entry that has a lower
8623 constraint than the one we are trying to add. */
8624 Mnode * max_mp = NULL;
8625 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
8628 /* If the minipool starts before the end of FIX->INSN then this FIX
8629 can not be placed into the current pool. Furthermore, adding the
8630 new constant pool entry may cause the pool to start FIX_SIZE bytes
8632 if (minipool_vector_head &&
8633 (fix->address + get_attr_length (fix->insn)
8634 >= minipool_vector_head->max_address - fix->fix_size))
8637 /* Scan the pool to see if a constant with the same value has
8638 already been added. While we are doing this, also note the
8639 location where we must insert the constant if it doesn't already
8641 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8643 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8644 && fix->mode == mp->mode
8645 && (GET_CODE (fix->value) != CODE_LABEL
8646 || (CODE_LABEL_NUMBER (fix->value)
8647 == CODE_LABEL_NUMBER (mp->value)))
8648 && rtx_equal_p (fix->value, mp->value))
8650 /* More than one fix references this entry. */
8652 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
8655 /* Note the insertion point if necessary. */
8657 && mp->max_address > max_address)
8660 /* If we are inserting an 8-bytes aligned quantity and
8661 we have not already found an insertion point, then
8662 make sure that all such 8-byte aligned quantities are
8663 placed at the start of the pool. */
8664 if (ARM_DOUBLEWORD_ALIGN
8666 && fix->fix_size >= 8
8667 && mp->fix_size < 8)
8670 max_address = mp->max_address;
8674 /* The value is not currently in the minipool, so we need to create
8675 a new entry for it. If MAX_MP is NULL, the entry will be put on
8676 the end of the list since the placement is less constrained than
8677 any existing entry. Otherwise, we insert the new fix before
8678 MAX_MP and, if necessary, adjust the constraints on the other
8681 mp->fix_size = fix->fix_size;
8682 mp->mode = fix->mode;
8683 mp->value = fix->value;
8685 /* Not yet required for a backwards ref. */
8686 mp->min_address = -65536;
8690 mp->max_address = max_address;
8692 mp->prev = minipool_vector_tail;
8694 if (mp->prev == NULL)
8696 minipool_vector_head = mp;
8697 minipool_vector_label = gen_label_rtx ();
8700 mp->prev->next = mp;
8702 minipool_vector_tail = mp;
8706 if (max_address > max_mp->max_address - mp->fix_size)
8707 mp->max_address = max_mp->max_address - mp->fix_size;
8709 mp->max_address = max_address;
8712 mp->prev = max_mp->prev;
8714 if (mp->prev != NULL)
8715 mp->prev->next = mp;
8717 minipool_vector_head = mp;
8720 /* Save the new entry. */
8723 /* Scan over the preceding entries and adjust their addresses as
8725 while (mp->prev != NULL
8726 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8728 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8736 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
8737 HOST_WIDE_INT min_address)
8739 HOST_WIDE_INT offset;
8741 /* The code below assumes these are different. */
8742 gcc_assert (mp != min_mp);
8746 if (min_address > mp->min_address)
8747 mp->min_address = min_address;
8751 /* We will adjust this below if it is too loose. */
8752 mp->min_address = min_address;
8754 /* Unlink MP from its current position. Since min_mp is non-null,
8755 mp->next must be non-null. */
8756 mp->next->prev = mp->prev;
8757 if (mp->prev != NULL)
8758 mp->prev->next = mp->next;
8760 minipool_vector_head = mp->next;
8762 /* Reinsert it after MIN_MP. */
8764 mp->next = min_mp->next;
8766 if (mp->next != NULL)
8767 mp->next->prev = mp;
8769 minipool_vector_tail = mp;
8775 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8777 mp->offset = offset;
8778 if (mp->refcount > 0)
8779 offset += mp->fix_size;
8781 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
8782 mp->next->min_address = mp->min_address + mp->fix_size;
8788 /* Add a constant to the minipool for a backward reference. Returns the
8789 node added or NULL if the constant will not fit in this pool.
8791 Note that the code for insertion for a backwards reference can be
8792 somewhat confusing because the calculated offsets for each fix do
8793 not take into account the size of the pool (which is still under
8796 add_minipool_backward_ref (Mfix *fix)
8798 /* If set, min_mp is the last pool_entry that has a lower constraint
8799 than the one we are trying to add. */
8800 Mnode *min_mp = NULL;
8801 /* This can be negative, since it is only a constraint. */
8802 HOST_WIDE_INT min_address = fix->address - fix->backwards;
8805 /* If we can't reach the current pool from this insn, or if we can't
8806 insert this entry at the end of the pool without pushing other
8807 fixes out of range, then we don't try. This ensures that we
8808 can't fail later on. */
8809 if (min_address >= minipool_barrier->address
8810 || (minipool_vector_tail->min_address + fix->fix_size
8811 >= minipool_barrier->address))
8814 /* Scan the pool to see if a constant with the same value has
8815 already been added. While we are doing this, also note the
8816 location where we must insert the constant if it doesn't already
8818 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
8820 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8821 && fix->mode == mp->mode
8822 && (GET_CODE (fix->value) != CODE_LABEL
8823 || (CODE_LABEL_NUMBER (fix->value)
8824 == CODE_LABEL_NUMBER (mp->value)))
8825 && rtx_equal_p (fix->value, mp->value)
8826 /* Check that there is enough slack to move this entry to the
8827 end of the table (this is conservative). */
8829 > (minipool_barrier->address
8830 + minipool_vector_tail->offset
8831 + minipool_vector_tail->fix_size)))
8834 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
8838 mp->min_address += fix->fix_size;
8841 /* Note the insertion point if necessary. */
8842 if (mp->min_address < min_address)
8844 /* For now, we do not allow the insertion of 8-byte alignment
8845 requiring nodes anywhere but at the start of the pool. */
8846 if (ARM_DOUBLEWORD_ALIGN
8847 && fix->fix_size >= 8 && mp->fix_size < 8)
8852 else if (mp->max_address
8853 < minipool_barrier->address + mp->offset + fix->fix_size)
8855 /* Inserting before this entry would push the fix beyond
8856 its maximum address (which can happen if we have
8857 re-located a forwards fix); force the new fix to come
8859 if (ARM_DOUBLEWORD_ALIGN
8860 && fix->fix_size >= 8 && mp->fix_size < 8)
8865 min_address = mp->min_address + fix->fix_size;
8868 /* Do not insert a non-8-byte aligned quantity before 8-byte
8869 aligned quantities. */
8870 else if (ARM_DOUBLEWORD_ALIGN
8871 && fix->fix_size < 8
8872 && mp->fix_size >= 8)
8875 min_address = mp->min_address + fix->fix_size;
8880 /* We need to create a new entry. */
8882 mp->fix_size = fix->fix_size;
8883 mp->mode = fix->mode;
8884 mp->value = fix->value;
8886 mp->max_address = minipool_barrier->address + 65536;
8888 mp->min_address = min_address;
8893 mp->next = minipool_vector_head;
8895 if (mp->next == NULL)
8897 minipool_vector_tail = mp;
8898 minipool_vector_label = gen_label_rtx ();
8901 mp->next->prev = mp;
8903 minipool_vector_head = mp;
8907 mp->next = min_mp->next;
8911 if (mp->next != NULL)
8912 mp->next->prev = mp;
8914 minipool_vector_tail = mp;
8917 /* Save the new entry. */
8925 /* Scan over the following entries and adjust their offsets. */
8926 while (mp->next != NULL)
8928 if (mp->next->min_address < mp->min_address + mp->fix_size)
8929 mp->next->min_address = mp->min_address + mp->fix_size;
8932 mp->next->offset = mp->offset + mp->fix_size;
8934 mp->next->offset = mp->offset;
8943 assign_minipool_offsets (Mfix *barrier)
8945 HOST_WIDE_INT offset = 0;
8948 minipool_barrier = barrier;
8950 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8952 mp->offset = offset;
8954 if (mp->refcount > 0)
8955 offset += mp->fix_size;
8959 /* Output the literal table */
8961 dump_minipool (rtx scan)
8967 if (ARM_DOUBLEWORD_ALIGN)
8968 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8969 if (mp->refcount > 0 && mp->fix_size >= 8)
8977 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
8978 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
8980 scan = emit_label_after (gen_label_rtx (), scan);
8981 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
8982 scan = emit_label_after (minipool_vector_label, scan);
8984 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
8986 if (mp->refcount > 0)
8991 ";; Offset %u, min %ld, max %ld ",
8992 (unsigned) mp->offset, (unsigned long) mp->min_address,
8993 (unsigned long) mp->max_address);
8994 arm_print_value (dump_file, mp->value);
8995 fputc ('\n', dump_file);
8998 switch (mp->fix_size)
9000 #ifdef HAVE_consttable_1
9002 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
9006 #ifdef HAVE_consttable_2
9008 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
9012 #ifdef HAVE_consttable_4
9014 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
9018 #ifdef HAVE_consttable_8
9020 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
9024 #ifdef HAVE_consttable_16
9026 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
9039 minipool_vector_head = minipool_vector_tail = NULL;
9040 scan = emit_insn_after (gen_consttable_end (), scan);
9041 scan = emit_barrier_after (scan);
9044 /* Return the cost of forcibly inserting a barrier after INSN. */
9046 arm_barrier_cost (rtx insn)
9048 /* Basing the location of the pool on the loop depth is preferable,
9049 but at the moment, the basic block information seems to be
9050 corrupt by this stage of the compilation. */
9052 rtx next = next_nonnote_insn (insn);
9054 if (next != NULL && GET_CODE (next) == CODE_LABEL)
9057 switch (GET_CODE (insn))
9060 /* It will always be better to place the table before the label, rather
9069 return base_cost - 10;
9072 return base_cost + 10;
9076 /* Find the best place in the insn stream in the range
9077 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
9078 Create the barrier by inserting a jump and add a new fix entry for
9081 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
9083 HOST_WIDE_INT count = 0;
9085 rtx from = fix->insn;
9086 /* The instruction after which we will insert the jump. */
9087 rtx selected = NULL;
9089 /* The address at which the jump instruction will be placed. */
9090 HOST_WIDE_INT selected_address;
9092 HOST_WIDE_INT max_count = max_address - fix->address;
9093 rtx label = gen_label_rtx ();
9095 selected_cost = arm_barrier_cost (from);
9096 selected_address = fix->address;
9098 while (from && count < max_count)
9103 /* This code shouldn't have been called if there was a natural barrier
9105 gcc_assert (GET_CODE (from) != BARRIER);
9107 /* Count the length of this insn. */
9108 count += get_attr_length (from);
9110 /* If there is a jump table, add its length. */
9111 tmp = is_jump_table (from);
9114 count += get_jump_table_size (tmp);
9116 /* Jump tables aren't in a basic block, so base the cost on
9117 the dispatch insn. If we select this location, we will
9118 still put the pool after the table. */
9119 new_cost = arm_barrier_cost (from);
9121 if (count < max_count
9122 && (!selected || new_cost <= selected_cost))
9125 selected_cost = new_cost;
9126 selected_address = fix->address + count;
9129 /* Continue after the dispatch table. */
9130 from = NEXT_INSN (tmp);
9134 new_cost = arm_barrier_cost (from);
9136 if (count < max_count
9137 && (!selected || new_cost <= selected_cost))
9140 selected_cost = new_cost;
9141 selected_address = fix->address + count;
9144 from = NEXT_INSN (from);
9147 /* Make sure that we found a place to insert the jump. */
9148 gcc_assert (selected);
9150 /* Create a new JUMP_INSN that branches around a barrier. */
9151 from = emit_jump_insn_after (gen_jump (label), selected);
9152 JUMP_LABEL (from) = label;
9153 barrier = emit_barrier_after (from);
9154 emit_label_after (label, barrier);
9156 /* Create a minipool barrier entry for the new barrier. */
9157 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
9158 new_fix->insn = barrier;
9159 new_fix->address = selected_address;
9160 new_fix->next = fix->next;
9161 fix->next = new_fix;
9166 /* Record that there is a natural barrier in the insn stream at
9169 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
9171 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9174 fix->address = address;
9177 if (minipool_fix_head != NULL)
9178 minipool_fix_tail->next = fix;
9180 minipool_fix_head = fix;
9182 minipool_fix_tail = fix;
9185 /* Record INSN, which will need fixing up to load a value from the
9186 minipool. ADDRESS is the offset of the insn since the start of the
9187 function; LOC is a pointer to the part of the insn which requires
9188 fixing; VALUE is the constant that must be loaded, which is of type
9191 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
9192 enum machine_mode mode, rtx value)
9194 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9197 fix->address = address;
9200 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
9202 fix->forwards = get_attr_pool_range (insn);
9203 fix->backwards = get_attr_neg_pool_range (insn);
9204 fix->minipool = NULL;
9206 /* If an insn doesn't have a range defined for it, then it isn't
9207 expecting to be reworked by this code. Better to stop now than
9208 to generate duff assembly code. */
9209 gcc_assert (fix->forwards || fix->backwards);
9211 /* If an entry requires 8-byte alignment then assume all constant pools
9212 require 4 bytes of padding. Trying to do this later on a per-pool
9213 basis is awkward because existing pool entries have to be modified. */
9214 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
9220 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
9221 GET_MODE_NAME (mode),
9222 INSN_UID (insn), (unsigned long) address,
9223 -1 * (long)fix->backwards, (long)fix->forwards);
9224 arm_print_value (dump_file, fix->value);
9225 fprintf (dump_file, "\n");
9228 /* Add it to the chain of fixes. */
9231 if (minipool_fix_head != NULL)
9232 minipool_fix_tail->next = fix;
9234 minipool_fix_head = fix;
9236 minipool_fix_tail = fix;
9239 /* Return the cost of synthesizing a 64-bit constant VAL inline.
9240 Returns the number of insns needed, or 99 if we don't know how to
9243 arm_const_double_inline_cost (rtx val)
9245 rtx lowpart, highpart;
9246 enum machine_mode mode;
9248 mode = GET_MODE (val);
9250 if (mode == VOIDmode)
9253 gcc_assert (GET_MODE_SIZE (mode) == 8);
9255 lowpart = gen_lowpart (SImode, val);
9256 highpart = gen_highpart_mode (SImode, mode, val);
9258 gcc_assert (GET_CODE (lowpart) == CONST_INT);
9259 gcc_assert (GET_CODE (highpart) == CONST_INT);
9261 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
9262 NULL_RTX, NULL_RTX, 0, 0)
9263 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
9264 NULL_RTX, NULL_RTX, 0, 0));
9267 /* Return true if it is worthwhile to split a 64-bit constant into two
9268 32-bit operations. This is the case if optimizing for size, or
9269 if we have load delay slots, or if one 32-bit part can be done with
9270 a single data operation. */
9272 arm_const_double_by_parts (rtx val)
9274 enum machine_mode mode = GET_MODE (val);
9277 if (optimize_size || arm_ld_sched)
9280 if (mode == VOIDmode)
9283 part = gen_highpart_mode (SImode, mode, val);
9285 gcc_assert (GET_CODE (part) == CONST_INT);
9287 if (const_ok_for_arm (INTVAL (part))
9288 || const_ok_for_arm (~INTVAL (part)))
9291 part = gen_lowpart (SImode, val);
9293 gcc_assert (GET_CODE (part) == CONST_INT);
9295 if (const_ok_for_arm (INTVAL (part))
9296 || const_ok_for_arm (~INTVAL (part)))
9302 /* Scan INSN and note any of its operands that need fixing.
9303 If DO_PUSHES is false we do not actually push any of the fixups
9304 needed. The function returns TRUE if any fixups were needed/pushed.
9305 This is used by arm_memory_load_p() which needs to know about loads
9306 of constants that will be converted into minipool loads. */
9308 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
9310 bool result = false;
9313 extract_insn (insn);
9315 if (!constrain_operands (1))
9316 fatal_insn_not_found (insn);
9318 if (recog_data.n_alternatives == 0)
9321 /* Fill in recog_op_alt with information about the constraints of
9323 preprocess_constraints ();
9325 for (opno = 0; opno < recog_data.n_operands; opno++)
9327 /* Things we need to fix can only occur in inputs. */
9328 if (recog_data.operand_type[opno] != OP_IN)
9331 /* If this alternative is a memory reference, then any mention
9332 of constants in this alternative is really to fool reload
9333 into allowing us to accept one there. We need to fix them up
9334 now so that we output the right code. */
9335 if (recog_op_alt[opno][which_alternative].memory_ok)
9337 rtx op = recog_data.operand[opno];
9339 if (CONSTANT_P (op))
9342 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
9343 recog_data.operand_mode[opno], op);
9346 else if (GET_CODE (op) == MEM
9347 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
9348 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
9352 rtx cop = avoid_constant_pool_reference (op);
9354 /* Casting the address of something to a mode narrower
9355 than a word can cause avoid_constant_pool_reference()
9356 to return the pool reference itself. That's no good to
9357 us here. Lets just hope that we can use the
9358 constant pool value directly. */
9360 cop = get_pool_constant (XEXP (op, 0));
9362 push_minipool_fix (insn, address,
9363 recog_data.operand_loc[opno],
9364 recog_data.operand_mode[opno], cop);
9375 /* Gcc puts the pool in the wrong place for ARM, since we can only
9376 load addresses a limited distance around the pc. We do some
9377 special munging to move the constant pool values to the correct
9378 point in the code. */
9383 HOST_WIDE_INT address = 0;
9386 minipool_fix_head = minipool_fix_tail = NULL;
9388 /* The first insn must always be a note, or the code below won't
9389 scan it properly. */
9390 insn = get_insns ();
9391 gcc_assert (GET_CODE (insn) == NOTE);
9394 /* Scan all the insns and record the operands that will need fixing. */
9395 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
9397 if (TARGET_CIRRUS_FIX_INVALID_INSNS
9398 && (arm_cirrus_insn_p (insn)
9399 || GET_CODE (insn) == JUMP_INSN
9400 || arm_memory_load_p (insn)))
9401 cirrus_reorg (insn);
9403 if (GET_CODE (insn) == BARRIER)
9404 push_minipool_barrier (insn, address);
9405 else if (INSN_P (insn))
9409 note_invalid_constants (insn, address, true);
9410 address += get_attr_length (insn);
9412 /* If the insn is a vector jump, add the size of the table
9413 and skip the table. */
9414 if ((table = is_jump_table (insn)) != NULL)
9416 address += get_jump_table_size (table);
9422 fix = minipool_fix_head;
9424 /* Now scan the fixups and perform the required changes. */
9429 Mfix * last_added_fix;
9430 Mfix * last_barrier = NULL;
9433 /* Skip any further barriers before the next fix. */
9434 while (fix && GET_CODE (fix->insn) == BARRIER)
9437 /* No more fixes. */
9441 last_added_fix = NULL;
9443 for (ftmp = fix; ftmp; ftmp = ftmp->next)
9445 if (GET_CODE (ftmp->insn) == BARRIER)
9447 if (ftmp->address >= minipool_vector_head->max_address)
9450 last_barrier = ftmp;
9452 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
9455 last_added_fix = ftmp; /* Keep track of the last fix added. */
9458 /* If we found a barrier, drop back to that; any fixes that we
9459 could have reached but come after the barrier will now go in
9460 the next mini-pool. */
9461 if (last_barrier != NULL)
9463 /* Reduce the refcount for those fixes that won't go into this
9465 for (fdel = last_barrier->next;
9466 fdel && fdel != ftmp;
9469 fdel->minipool->refcount--;
9470 fdel->minipool = NULL;
9473 ftmp = last_barrier;
9477 /* ftmp is first fix that we can't fit into this pool and
9478 there no natural barriers that we could use. Insert a
9479 new barrier in the code somewhere between the previous
9480 fix and this one, and arrange to jump around it. */
9481 HOST_WIDE_INT max_address;
9483 /* The last item on the list of fixes must be a barrier, so
9484 we can never run off the end of the list of fixes without
9485 last_barrier being set. */
9488 max_address = minipool_vector_head->max_address;
9489 /* Check that there isn't another fix that is in range that
9490 we couldn't fit into this pool because the pool was
9491 already too large: we need to put the pool before such an
9492 instruction. The pool itself may come just after the
9493 fix because create_fix_barrier also allows space for a
9494 jump instruction. */
9495 if (ftmp->address < max_address)
9496 max_address = ftmp->address + 1;
9498 last_barrier = create_fix_barrier (last_added_fix, max_address);
9501 assign_minipool_offsets (last_barrier);
9505 if (GET_CODE (ftmp->insn) != BARRIER
9506 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
9513 /* Scan over the fixes we have identified for this pool, fixing them
9514 up and adding the constants to the pool itself. */
9515 for (this_fix = fix; this_fix && ftmp != this_fix;
9516 this_fix = this_fix->next)
9517 if (GET_CODE (this_fix->insn) != BARRIER)
9520 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
9521 minipool_vector_label),
9522 this_fix->minipool->offset);
9523 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
9526 dump_minipool (last_barrier->insn);
9530 /* From now on we must synthesize any constants that we can't handle
9531 directly. This can happen if the RTL gets split during final
9532 instruction generation. */
9533 after_arm_reorg = 1;
9535 /* Free the minipool memory. */
9536 obstack_free (&minipool_obstack, minipool_startobj);
9539 /* Routines to output assembly language. */
9541 /* If the rtx is the correct value then return the string of the number.
9542 In this way we can ensure that valid double constants are generated even
9543 when cross compiling. */
9545 fp_immediate_constant (rtx x)
9550 if (!fp_consts_inited)
9553 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9554 for (i = 0; i < 8; i++)
9555 if (REAL_VALUES_EQUAL (r, values_fp[i]))
9556 return strings_fp[i];
9561 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
9563 fp_const_from_val (REAL_VALUE_TYPE *r)
9567 if (!fp_consts_inited)
9570 for (i = 0; i < 8; i++)
9571 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
9572 return strings_fp[i];
9577 /* Output the operands of a LDM/STM instruction to STREAM.
9578 MASK is the ARM register set mask of which only bits 0-15 are important.
9579 REG is the base register, either the frame pointer or the stack pointer,
9580 INSTR is the possibly suffixed load or store instruction.
9581 RFE is nonzero if the instruction should also copy spsr to cpsr. */
9584 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
9585 unsigned long mask, int rfe)
9588 bool not_first = FALSE;
9590 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
9591 fputc ('\t', stream);
9592 asm_fprintf (stream, instr, reg);
9593 fputc ('{', stream);
9595 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9596 if (mask & (1 << i))
9599 fprintf (stream, ", ");
9601 asm_fprintf (stream, "%r", i);
9606 fprintf (stream, "}^\n");
9608 fprintf (stream, "}\n");
9612 /* Output a FLDMD instruction to STREAM.
9613 BASE if the register containing the address.
9614 REG and COUNT specify the register range.
9615 Extra registers may be added to avoid hardware bugs.
9617 We output FLDMD even for ARMv5 VFP implementations. Although
9618 FLDMD is technically not supported until ARMv6, it is believed
9619 that all VFP implementations support its use in this context. */
9622 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
9626 /* Workaround ARM10 VFPr1 bug. */
9627 if (count == 2 && !arm_arch6)
9634 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
9635 load into multiple parts if we have to handle more than 16 registers. */
9638 vfp_output_fldmd (stream, base, reg, 16);
9639 vfp_output_fldmd (stream, base, reg + 16, count - 16);
9643 fputc ('\t', stream);
9644 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
9646 for (i = reg; i < reg + count; i++)
9649 fputs (", ", stream);
9650 asm_fprintf (stream, "d%d", i);
9652 fputs ("}\n", stream);
9657 /* Output the assembly for a store multiple. */
9660 vfp_output_fstmd (rtx * operands)
9667 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
9668 p = strlen (pattern);
9670 gcc_assert (GET_CODE (operands[1]) == REG);
9672 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
9673 for (i = 1; i < XVECLEN (operands[2], 0); i++)
9675 p += sprintf (&pattern[p], ", d%d", base + i);
9677 strcpy (&pattern[p], "}");
9679 output_asm_insn (pattern, operands);
9684 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
9685 number of bytes pushed. */
9688 vfp_emit_fstmd (int base_reg, int count)
9695 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
9696 register pairs are stored by a store multiple insn. We avoid this
9697 by pushing an extra pair. */
9698 if (count == 2 && !arm_arch6)
9700 if (base_reg == LAST_VFP_REGNUM - 3)
9705 /* FSTMD may not store more than 16 doubleword registers at once. Split
9706 larger stores into multiple parts (up to a maximum of two, in
9711 /* NOTE: base_reg is an internal register number, so each D register
9713 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
9714 saved += vfp_emit_fstmd (base_reg, 16);
9718 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
9719 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
9721 reg = gen_rtx_REG (DFmode, base_reg);
9725 = gen_rtx_SET (VOIDmode,
9726 gen_frame_mem (BLKmode,
9727 gen_rtx_PRE_DEC (BLKmode,
9728 stack_pointer_rtx)),
9729 gen_rtx_UNSPEC (BLKmode,
9733 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9734 plus_constant (stack_pointer_rtx, -(count * 8)));
9735 RTX_FRAME_RELATED_P (tmp) = 1;
9736 XVECEXP (dwarf, 0, 0) = tmp;
9738 tmp = gen_rtx_SET (VOIDmode,
9739 gen_frame_mem (DFmode, stack_pointer_rtx),
9741 RTX_FRAME_RELATED_P (tmp) = 1;
9742 XVECEXP (dwarf, 0, 1) = tmp;
9744 for (i = 1; i < count; i++)
9746 reg = gen_rtx_REG (DFmode, base_reg);
9748 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
9750 tmp = gen_rtx_SET (VOIDmode,
9751 gen_frame_mem (DFmode,
9752 plus_constant (stack_pointer_rtx,
9755 RTX_FRAME_RELATED_P (tmp) = 1;
9756 XVECEXP (dwarf, 0, i + 1) = tmp;
9759 par = emit_insn (par);
9760 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9762 RTX_FRAME_RELATED_P (par) = 1;
9767 /* Emit a call instruction with pattern PAT. ADDR is the address of
9771 arm_emit_call_insn (rtx pat, rtx addr)
9775 insn = emit_call_insn (pat);
9777 /* The PIC register is live on entry to VxWorks PIC PLT entries.
9778 If the call might use such an entry, add a use of the PIC register
9779 to the instruction's CALL_INSN_FUNCTION_USAGE. */
9780 if (TARGET_VXWORKS_RTP
9782 && GET_CODE (addr) == SYMBOL_REF
9783 && (SYMBOL_REF_DECL (addr)
9784 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
9785 : !SYMBOL_REF_LOCAL_P (addr)))
9787 require_pic_register ();
9788 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
9792 /* Output a 'call' insn. */
9794 output_call (rtx *operands)
9796 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
9798 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
9799 if (REGNO (operands[0]) == LR_REGNUM)
9801 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
9802 output_asm_insn ("mov%?\t%0, %|lr", operands);
9805 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9807 if (TARGET_INTERWORK || arm_arch4t)
9808 output_asm_insn ("bx%?\t%0", operands);
9810 output_asm_insn ("mov%?\t%|pc, %0", operands);
9815 /* Output a 'call' insn that is a reference in memory. */
9817 output_call_mem (rtx *operands)
9819 if (TARGET_INTERWORK && !arm_arch5)
9821 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9822 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9823 output_asm_insn ("bx%?\t%|ip", operands);
9825 else if (regno_use_in (LR_REGNUM, operands[0]))
9827 /* LR is used in the memory address. We load the address in the
9828 first instruction. It's safe to use IP as the target of the
9829 load since the call will kill it anyway. */
9830 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9832 output_asm_insn ("blx%?\t%|ip", operands);
9835 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9837 output_asm_insn ("bx%?\t%|ip", operands);
9839 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
9844 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9845 output_asm_insn ("ldr%?\t%|pc, %0", operands);
9852 /* Output a move from arm registers to an fpa registers.
9853 OPERANDS[0] is an fpa register.
9854 OPERANDS[1] is the first registers of an arm register pair. */
9856 output_mov_long_double_fpa_from_arm (rtx *operands)
9858 int arm_reg0 = REGNO (operands[1]);
9861 gcc_assert (arm_reg0 != IP_REGNUM);
9863 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9864 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9865 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9867 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9868 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
9873 /* Output a move from an fpa register to arm registers.
9874 OPERANDS[0] is the first registers of an arm register pair.
9875 OPERANDS[1] is an fpa register. */
9877 output_mov_long_double_arm_from_fpa (rtx *operands)
9879 int arm_reg0 = REGNO (operands[0]);
9882 gcc_assert (arm_reg0 != IP_REGNUM);
9884 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9885 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9886 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9888 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
9889 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9893 /* Output a move from arm registers to arm registers of a long double
9894 OPERANDS[0] is the destination.
9895 OPERANDS[1] is the source. */
9897 output_mov_long_double_arm_from_arm (rtx *operands)
9899 /* We have to be careful here because the two might overlap. */
9900 int dest_start = REGNO (operands[0]);
9901 int src_start = REGNO (operands[1]);
9905 if (dest_start < src_start)
9907 for (i = 0; i < 3; i++)
9909 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9910 ops[1] = gen_rtx_REG (SImode, src_start + i);
9911 output_asm_insn ("mov%?\t%0, %1", ops);
9916 for (i = 2; i >= 0; i--)
9918 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9919 ops[1] = gen_rtx_REG (SImode, src_start + i);
9920 output_asm_insn ("mov%?\t%0, %1", ops);
9928 /* Emit a MOVW/MOVT pair. */
9929 void arm_emit_movpair (rtx dest, rtx src)
9931 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
9932 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
9936 /* Output a move from arm registers to an fpa registers.
9937 OPERANDS[0] is an fpa register.
9938 OPERANDS[1] is the first registers of an arm register pair. */
9940 output_mov_double_fpa_from_arm (rtx *operands)
9942 int arm_reg0 = REGNO (operands[1]);
9945 gcc_assert (arm_reg0 != IP_REGNUM);
9947 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9948 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9949 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
9950 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
9954 /* Output a move from an fpa register to arm registers.
9955 OPERANDS[0] is the first registers of an arm register pair.
9956 OPERANDS[1] is an fpa register. */
9958 output_mov_double_arm_from_fpa (rtx *operands)
9960 int arm_reg0 = REGNO (operands[0]);
9963 gcc_assert (arm_reg0 != IP_REGNUM);
9965 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9966 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9967 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
9968 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
9972 /* Output a move between double words.
9973 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
9974 or MEM<-REG and all MEMs must be offsettable addresses. */
9976 output_move_double (rtx *operands)
9978 enum rtx_code code0 = GET_CODE (operands[0]);
9979 enum rtx_code code1 = GET_CODE (operands[1]);
9984 int reg0 = REGNO (operands[0]);
9986 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9988 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
9990 switch (GET_CODE (XEXP (operands[1], 0)))
9994 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
9995 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
9997 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10001 gcc_assert (TARGET_LDRD);
10002 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
10007 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
10009 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
10014 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
10016 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
10020 gcc_assert (TARGET_LDRD);
10021 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
10026 /* Autoicrement addressing modes should never have overlapping
10027 base and destination registers, and overlapping index registers
10028 are already prohibited, so this doesn't need to worry about
10030 otherops[0] = operands[0];
10031 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
10032 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
10034 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
10036 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
10038 /* Registers overlap so split out the increment. */
10039 output_asm_insn ("add%?\t%1, %1, %2", otherops);
10040 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
10044 /* IWMMXT allows offsets larger than ldrd can handle,
10045 fix these up with a pair of ldr. */
10046 if (GET_CODE (otherops[2]) == CONST_INT
10047 && (INTVAL(otherops[2]) <= -256
10048 || INTVAL(otherops[2]) >= 256))
10050 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10051 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
10052 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10055 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
10060 /* IWMMXT allows offsets larger than ldrd can handle,
10061 fix these up with a pair of ldr. */
10062 if (GET_CODE (otherops[2]) == CONST_INT
10063 && (INTVAL(otherops[2]) <= -256
10064 || INTVAL(otherops[2]) >= 256))
10066 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
10067 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10068 otherops[0] = operands[0];
10069 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10072 /* We only allow constant increments, so this is safe. */
10073 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
10079 /* We might be able to use ldrd %0, %1 here. However the range is
10080 different to ldr/adr, and it is broken on some ARMv7-M
10081 implementations. */
10082 /* Use the second register of the pair to avoid problematic
10084 otherops[1] = operands[1];
10085 output_asm_insn ("adr%?\t%0, %1", otherops);
10086 operands[1] = otherops[0];
10088 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
10090 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
10093 /* ??? This needs checking for thumb2. */
10095 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
10096 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
10098 otherops[0] = operands[0];
10099 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
10100 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
10102 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
10104 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
10106 switch ((int) INTVAL (otherops[2]))
10109 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
10114 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
10119 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
10123 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
10124 operands[1] = otherops[0];
10126 && (GET_CODE (otherops[2]) == REG
10127 || (GET_CODE (otherops[2]) == CONST_INT
10128 && INTVAL (otherops[2]) > -256
10129 && INTVAL (otherops[2]) < 256)))
10131 if (reg_overlap_mentioned_p (operands[0],
10135 /* Swap base and index registers over to
10136 avoid a conflict. */
10138 otherops[1] = otherops[2];
10141 /* If both registers conflict, it will usually
10142 have been fixed by a splitter. */
10143 if (reg_overlap_mentioned_p (operands[0], otherops[2])
10144 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
10146 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10147 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
10151 otherops[0] = operands[0];
10152 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
10157 if (GET_CODE (otherops[2]) == CONST_INT)
10159 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
10160 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
10162 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10165 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10168 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
10171 return "ldr%(d%)\t%0, [%1]";
10173 return "ldm%(ia%)\t%1, %M0";
10177 otherops[1] = adjust_address (operands[1], SImode, 4);
10178 /* Take care of overlapping base/data reg. */
10179 if (reg_mentioned_p (operands[0], operands[1]))
10181 output_asm_insn ("ldr%?\t%0, %1", otherops);
10182 output_asm_insn ("ldr%?\t%0, %1", operands);
10186 output_asm_insn ("ldr%?\t%0, %1", operands);
10187 output_asm_insn ("ldr%?\t%0, %1", otherops);
10194 /* Constraints should ensure this. */
10195 gcc_assert (code0 == MEM && code1 == REG);
10196 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
10198 switch (GET_CODE (XEXP (operands[0], 0)))
10202 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
10204 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10208 gcc_assert (TARGET_LDRD);
10209 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
10214 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
10216 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
10221 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
10223 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
10227 gcc_assert (TARGET_LDRD);
10228 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
10233 otherops[0] = operands[1];
10234 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
10235 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
10237 /* IWMMXT allows offsets larger than ldrd can handle,
10238 fix these up with a pair of ldr. */
10239 if (GET_CODE (otherops[2]) == CONST_INT
10240 && (INTVAL(otherops[2]) <= -256
10241 || INTVAL(otherops[2]) >= 256))
10244 reg1 = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10245 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10247 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10248 otherops[0] = reg1;
10249 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10253 otherops[0] = reg1;
10254 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10255 otherops[0] = operands[1];
10256 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10259 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10260 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
10262 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
10266 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
10267 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
10269 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
10272 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
10278 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
10284 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
10289 && (GET_CODE (otherops[2]) == REG
10290 || (GET_CODE (otherops[2]) == CONST_INT
10291 && INTVAL (otherops[2]) > -256
10292 && INTVAL (otherops[2]) < 256)))
10294 otherops[0] = operands[1];
10295 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
10296 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
10302 otherops[0] = adjust_address (operands[0], SImode, 4);
10303 otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10304 output_asm_insn ("str%?\t%1, %0", operands);
10305 output_asm_insn ("str%?\t%1, %0", otherops);
10312 /* Output a move, load or store for quad-word vectors in ARM registers. Only
10313 handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
10316 output_move_quad (rtx *operands)
10318 if (REG_P (operands[0]))
10320 /* Load, or reg->reg move. */
10322 if (MEM_P (operands[1]))
10324 switch (GET_CODE (XEXP (operands[1], 0)))
10327 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10332 output_asm_insn ("adr%?\t%0, %1", operands);
10333 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
10337 gcc_unreachable ();
10345 gcc_assert (REG_P (operands[1]));
10347 dest = REGNO (operands[0]);
10348 src = REGNO (operands[1]);
10350 /* This seems pretty dumb, but hopefully GCC won't try to do it
10353 for (i = 0; i < 4; i++)
10355 ops[0] = gen_rtx_REG (SImode, dest + i);
10356 ops[1] = gen_rtx_REG (SImode, src + i);
10357 output_asm_insn ("mov%?\t%0, %1", ops);
10360 for (i = 3; i >= 0; i--)
10362 ops[0] = gen_rtx_REG (SImode, dest + i);
10363 ops[1] = gen_rtx_REG (SImode, src + i);
10364 output_asm_insn ("mov%?\t%0, %1", ops);
10370 gcc_assert (MEM_P (operands[0]));
10371 gcc_assert (REG_P (operands[1]));
10372 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
10374 switch (GET_CODE (XEXP (operands[0], 0)))
10377 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10381 gcc_unreachable ();
10388 /* Output a VFP load or store instruction. */
10391 output_move_vfp (rtx *operands)
10393 rtx reg, mem, addr, ops[2];
10394 int load = REG_P (operands[0]);
10395 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
10396 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
10399 enum machine_mode mode;
10401 reg = operands[!load];
10402 mem = operands[load];
10404 mode = GET_MODE (reg);
10406 gcc_assert (REG_P (reg));
10407 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
10408 gcc_assert (mode == SFmode
10412 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
10413 gcc_assert (MEM_P (mem));
10415 addr = XEXP (mem, 0);
10417 switch (GET_CODE (addr))
10420 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
10421 ops[0] = XEXP (addr, 0);
10426 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
10427 ops[0] = XEXP (addr, 0);
10432 templ = "f%s%c%%?\t%%%s0, %%1%s";
10438 sprintf (buff, templ,
10439 load ? "ld" : "st",
10442 integer_p ? "\t%@ int" : "");
10443 output_asm_insn (buff, ops);
10448 /* Output a Neon quad-word load or store, or a load or store for
10449 larger structure modes.
10451 WARNING: The ordering of elements is weird in big-endian mode,
10452 because we use VSTM, as required by the EABI. GCC RTL defines
10453 element ordering based on in-memory order. This can be differ
10454 from the architectural ordering of elements within a NEON register.
10455 The intrinsics defined in arm_neon.h use the NEON register element
10456 ordering, not the GCC RTL element ordering.
10458 For example, the in-memory ordering of a big-endian a quadword
10459 vector with 16-bit elements when stored from register pair {d0,d1}
10460 will be (lowest address first, d0[N] is NEON register element N):
10462 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
10464 When necessary, quadword registers (dN, dN+1) are moved to ARM
10465 registers from rN in the order:
10467 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
10469 So that STM/LDM can be used on vectors in ARM registers, and the
10470 same memory layout will result as if VSTM/VLDM were used. */
10473 output_move_neon (rtx *operands)
10475 rtx reg, mem, addr, ops[2];
10476 int regno, load = REG_P (operands[0]);
10479 enum machine_mode mode;
10481 reg = operands[!load];
10482 mem = operands[load];
10484 mode = GET_MODE (reg);
10486 gcc_assert (REG_P (reg));
10487 regno = REGNO (reg);
10488 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
10489 || NEON_REGNO_OK_FOR_QUAD (regno));
10490 gcc_assert (VALID_NEON_DREG_MODE (mode)
10491 || VALID_NEON_QREG_MODE (mode)
10492 || VALID_NEON_STRUCT_MODE (mode));
10493 gcc_assert (MEM_P (mem));
10495 addr = XEXP (mem, 0);
10497 /* Strip off const from addresses like (const (plus (...))). */
10498 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
10499 addr = XEXP (addr, 0);
10501 switch (GET_CODE (addr))
10504 templ = "v%smia%%?\t%%0!, %%h1";
10505 ops[0] = XEXP (addr, 0);
10510 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
10511 gcc_unreachable ();
10516 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
10519 for (i = 0; i < nregs; i++)
10521 /* We're only using DImode here because it's a convenient size. */
10522 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
10523 ops[1] = adjust_address (mem, SImode, 8 * i);
10524 if (reg_overlap_mentioned_p (ops[0], mem))
10526 gcc_assert (overlap == -1);
10531 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10532 output_asm_insn (buff, ops);
10537 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
10538 ops[1] = adjust_address (mem, SImode, 8 * overlap);
10539 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10540 output_asm_insn (buff, ops);
10547 templ = "v%smia%%?\t%%m0, %%h1";
10552 sprintf (buff, templ, load ? "ld" : "st");
10553 output_asm_insn (buff, ops);
10558 /* Output an ADD r, s, #n where n may be too big for one instruction.
10559 If adding zero to one register, output nothing. */
10561 output_add_immediate (rtx *operands)
10563 HOST_WIDE_INT n = INTVAL (operands[2]);
10565 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
10568 output_multi_immediate (operands,
10569 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
10572 output_multi_immediate (operands,
10573 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
10580 /* Output a multiple immediate operation.
10581 OPERANDS is the vector of operands referred to in the output patterns.
10582 INSTR1 is the output pattern to use for the first constant.
10583 INSTR2 is the output pattern to use for subsequent constants.
10584 IMMED_OP is the index of the constant slot in OPERANDS.
10585 N is the constant value. */
10586 static const char *
10587 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
10588 int immed_op, HOST_WIDE_INT n)
10590 #if HOST_BITS_PER_WIDE_INT > 32
10596 /* Quick and easy output. */
10597 operands[immed_op] = const0_rtx;
10598 output_asm_insn (instr1, operands);
10603 const char * instr = instr1;
10605 /* Note that n is never zero here (which would give no output). */
10606 for (i = 0; i < 32; i += 2)
10610 operands[immed_op] = GEN_INT (n & (255 << i));
10611 output_asm_insn (instr, operands);
10621 /* Return the name of a shifter operation. */
10622 static const char *
10623 arm_shift_nmem(enum rtx_code code)
10628 return ARM_LSL_NAME;
10644 /* Return the appropriate ARM instruction for the operation code.
10645 The returned result should not be overwritten. OP is the rtx of the
10646 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
10649 arithmetic_instr (rtx op, int shift_first_arg)
10651 switch (GET_CODE (op))
10657 return shift_first_arg ? "rsb" : "sub";
10672 return arm_shift_nmem(GET_CODE(op));
10675 gcc_unreachable ();
10679 /* Ensure valid constant shifts and return the appropriate shift mnemonic
10680 for the operation code. The returned result should not be overwritten.
10681 OP is the rtx code of the shift.
10682 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
10684 static const char *
10685 shift_op (rtx op, HOST_WIDE_INT *amountp)
10688 enum rtx_code code = GET_CODE (op);
10690 switch (GET_CODE (XEXP (op, 1)))
10698 *amountp = INTVAL (XEXP (op, 1));
10702 gcc_unreachable ();
10708 gcc_assert (*amountp != -1);
10709 *amountp = 32 - *amountp;
10712 /* Fall through. */
10718 mnem = arm_shift_nmem(code);
10722 /* We never have to worry about the amount being other than a
10723 power of 2, since this case can never be reloaded from a reg. */
10724 gcc_assert (*amountp != -1);
10725 *amountp = int_log2 (*amountp);
10726 return ARM_LSL_NAME;
10729 gcc_unreachable ();
10732 if (*amountp != -1)
10734 /* This is not 100% correct, but follows from the desire to merge
10735 multiplication by a power of 2 with the recognizer for a
10736 shift. >=32 is not a valid shift for "lsl", so we must try and
10737 output a shift that produces the correct arithmetical result.
10738 Using lsr #32 is identical except for the fact that the carry bit
10739 is not set correctly if we set the flags; but we never use the
10740 carry bit from such an operation, so we can ignore that. */
10741 if (code == ROTATERT)
10742 /* Rotate is just modulo 32. */
10744 else if (*amountp != (*amountp & 31))
10746 if (code == ASHIFT)
10751 /* Shifts of 0 are no-ops. */
10759 /* Obtain the shift from the POWER of two. */
10761 static HOST_WIDE_INT
10762 int_log2 (HOST_WIDE_INT power)
10764 HOST_WIDE_INT shift = 0;
10766 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
10768 gcc_assert (shift <= 31);
10775 /* Output a .ascii pseudo-op, keeping track of lengths. This is
10776 because /bin/as is horribly restrictive. The judgement about
10777 whether or not each character is 'printable' (and can be output as
10778 is) or not (and must be printed with an octal escape) must be made
10779 with reference to the *host* character set -- the situation is
10780 similar to that discussed in the comments above pp_c_char in
10781 c-pretty-print.c. */
10783 #define MAX_ASCII_LEN 51
10786 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
10789 int len_so_far = 0;
10791 fputs ("\t.ascii\t\"", stream);
10793 for (i = 0; i < len; i++)
10797 if (len_so_far >= MAX_ASCII_LEN)
10799 fputs ("\"\n\t.ascii\t\"", stream);
10805 if (c == '\\' || c == '\"')
10807 putc ('\\', stream);
10815 fprintf (stream, "\\%03o", c);
10820 fputs ("\"\n", stream);
10823 /* Compute the register save mask for registers 0 through 12
10824 inclusive. This code is used by arm_compute_save_reg_mask. */
10826 static unsigned long
10827 arm_compute_save_reg0_reg12_mask (void)
10829 unsigned long func_type = arm_current_func_type ();
10830 unsigned long save_reg_mask = 0;
10833 if (IS_INTERRUPT (func_type))
10835 unsigned int max_reg;
10836 /* Interrupt functions must not corrupt any registers,
10837 even call clobbered ones. If this is a leaf function
10838 we can just examine the registers used by the RTL, but
10839 otherwise we have to assume that whatever function is
10840 called might clobber anything, and so we have to save
10841 all the call-clobbered registers as well. */
10842 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
10843 /* FIQ handlers have registers r8 - r12 banked, so
10844 we only need to check r0 - r7, Normal ISRs only
10845 bank r14 and r15, so we must check up to r12.
10846 r13 is the stack pointer which is always preserved,
10847 so we do not need to consider it here. */
10852 for (reg = 0; reg <= max_reg; reg++)
10853 if (df_regs_ever_live_p (reg)
10854 || (! current_function_is_leaf && call_used_regs[reg]))
10855 save_reg_mask |= (1 << reg);
10857 /* Also save the pic base register if necessary. */
10859 && !TARGET_SINGLE_PIC_BASE
10860 && arm_pic_register != INVALID_REGNUM
10861 && crtl->uses_pic_offset_table)
10862 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10866 /* In the normal case we only need to save those registers
10867 which are call saved and which are used by this function. */
10868 for (reg = 0; reg <= 11; reg++)
10869 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
10870 save_reg_mask |= (1 << reg);
10872 /* Handle the frame pointer as a special case. */
10873 if (frame_pointer_needed)
10874 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
10876 /* If we aren't loading the PIC register,
10877 don't stack it even though it may be live. */
10879 && !TARGET_SINGLE_PIC_BASE
10880 && arm_pic_register != INVALID_REGNUM
10881 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
10882 || crtl->uses_pic_offset_table))
10883 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10885 /* The prologue will copy SP into R0, so save it. */
10886 if (IS_STACKALIGN (func_type))
10887 save_reg_mask |= 1;
10890 /* Save registers so the exception handler can modify them. */
10891 if (crtl->calls_eh_return)
10897 reg = EH_RETURN_DATA_REGNO (i);
10898 if (reg == INVALID_REGNUM)
10900 save_reg_mask |= 1 << reg;
10904 return save_reg_mask;
10908 /* Compute the number of bytes used to store the static chain register on the
10909 stack, above the stack frame. We need to know this accurately to get the
10910 alignment of the rest of the stack frame correct. */
10912 static int arm_compute_static_chain_stack_bytes (void)
10914 unsigned long func_type = arm_current_func_type ();
10915 int static_chain_stack_bytes = 0;
10917 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
10918 IS_NESTED (func_type) &&
10919 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
10920 static_chain_stack_bytes = 4;
10922 return static_chain_stack_bytes;
10926 /* Compute a bit mask of which registers need to be
10927 saved on the stack for the current function.
10928 This is used by arm_get_frame_offsets, which may add extra registers. */
10930 static unsigned long
10931 arm_compute_save_reg_mask (void)
10933 unsigned int save_reg_mask = 0;
10934 unsigned long func_type = arm_current_func_type ();
10937 if (IS_NAKED (func_type))
10938 /* This should never really happen. */
10941 /* If we are creating a stack frame, then we must save the frame pointer,
10942 IP (which will hold the old stack pointer), LR and the PC. */
10943 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
10945 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
10948 | (1 << PC_REGNUM);
10950 /* Volatile functions do not return, so there
10951 is no need to save any other registers. */
10952 if (IS_VOLATILE (func_type))
10953 return save_reg_mask;
10955 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
10957 /* Decide if we need to save the link register.
10958 Interrupt routines have their own banked link register,
10959 so they never need to save it.
10960 Otherwise if we do not use the link register we do not need to save
10961 it. If we are pushing other registers onto the stack however, we
10962 can save an instruction in the epilogue by pushing the link register
10963 now and then popping it back into the PC. This incurs extra memory
10964 accesses though, so we only do it when optimizing for size, and only
10965 if we know that we will not need a fancy return sequence. */
10966 if (df_regs_ever_live_p (LR_REGNUM)
10969 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
10970 && !crtl->calls_eh_return))
10971 save_reg_mask |= 1 << LR_REGNUM;
10973 if (cfun->machine->lr_save_eliminated)
10974 save_reg_mask &= ~ (1 << LR_REGNUM);
10976 if (TARGET_REALLY_IWMMXT
10977 && ((bit_count (save_reg_mask)
10978 + ARM_NUM_INTS (crtl->args.pretend_args_size +
10979 arm_compute_static_chain_stack_bytes())
10982 /* The total number of registers that are going to be pushed
10983 onto the stack is odd. We need to ensure that the stack
10984 is 64-bit aligned before we start to save iWMMXt registers,
10985 and also before we start to create locals. (A local variable
10986 might be a double or long long which we will load/store using
10987 an iWMMXt instruction). Therefore we need to push another
10988 ARM register, so that the stack will be 64-bit aligned. We
10989 try to avoid using the arg registers (r0 -r3) as they might be
10990 used to pass values in a tail call. */
10991 for (reg = 4; reg <= 12; reg++)
10992 if ((save_reg_mask & (1 << reg)) == 0)
10996 save_reg_mask |= (1 << reg);
10999 cfun->machine->sibcall_blocked = 1;
11000 save_reg_mask |= (1 << 3);
11004 /* We may need to push an additional register for use initializing the
11005 PIC base register. */
11006 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
11007 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
11009 reg = thumb_find_work_register (1 << 4);
11010 if (!call_used_regs[reg])
11011 save_reg_mask |= (1 << reg);
11014 return save_reg_mask;
11018 /* Compute a bit mask of which registers need to be
11019 saved on the stack for the current function. */
11020 static unsigned long
11021 thumb1_compute_save_reg_mask (void)
11023 unsigned long mask;
11027 for (reg = 0; reg < 12; reg ++)
11028 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11032 && !TARGET_SINGLE_PIC_BASE
11033 && arm_pic_register != INVALID_REGNUM
11034 && crtl->uses_pic_offset_table)
11035 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11037 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
11038 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
11039 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
11041 /* LR will also be pushed if any lo regs are pushed. */
11042 if (mask & 0xff || thumb_force_lr_save ())
11043 mask |= (1 << LR_REGNUM);
11045 /* Make sure we have a low work register if we need one.
11046 We will need one if we are going to push a high register,
11047 but we are not currently intending to push a low register. */
11048 if ((mask & 0xff) == 0
11049 && ((mask & 0x0f00) || TARGET_BACKTRACE))
11051 /* Use thumb_find_work_register to choose which register
11052 we will use. If the register is live then we will
11053 have to push it. Use LAST_LO_REGNUM as our fallback
11054 choice for the register to select. */
11055 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
11056 /* Make sure the register returned by thumb_find_work_register is
11057 not part of the return value. */
11058 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
11059 reg = LAST_LO_REGNUM;
11061 if (! call_used_regs[reg])
11065 /* The 504 below is 8 bytes less than 512 because there are two possible
11066 alignment words. We can't tell here if they will be present or not so we
11067 have to play it safe and assume that they are. */
11068 if ((CALLER_INTERWORKING_SLOT_SIZE +
11069 ROUND_UP_WORD (get_frame_size ()) +
11070 crtl->outgoing_args_size) >= 504)
11072 /* This is the same as the code in thumb1_expand_prologue() which
11073 determines which register to use for stack decrement. */
11074 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
11075 if (mask & (1 << reg))
11078 if (reg > LAST_LO_REGNUM)
11080 /* Make sure we have a register available for stack decrement. */
11081 mask |= 1 << LAST_LO_REGNUM;
11089 /* Return the number of bytes required to save VFP registers. */
11091 arm_get_vfp_saved_size (void)
11093 unsigned int regno;
11098 /* Space for saved VFP registers. */
11099 if (TARGET_HARD_FLOAT && TARGET_VFP)
11102 for (regno = FIRST_VFP_REGNUM;
11103 regno < LAST_VFP_REGNUM;
11106 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
11107 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
11111 /* Workaround ARM10 VFPr1 bug. */
11112 if (count == 2 && !arm_arch6)
11114 saved += count * 8;
11123 if (count == 2 && !arm_arch6)
11125 saved += count * 8;
11132 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
11133 everything bar the final return instruction. */
11135 output_return_instruction (rtx operand, int really_return, int reverse)
11137 char conditional[10];
11140 unsigned long live_regs_mask;
11141 unsigned long func_type;
11142 arm_stack_offsets *offsets;
11144 func_type = arm_current_func_type ();
11146 if (IS_NAKED (func_type))
11149 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11151 /* If this function was declared non-returning, and we have
11152 found a tail call, then we have to trust that the called
11153 function won't return. */
11158 /* Otherwise, trap an attempted return by aborting. */
11160 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
11162 assemble_external_libcall (ops[1]);
11163 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
11169 gcc_assert (!cfun->calls_alloca || really_return);
11171 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
11173 return_used_this_function = 1;
11175 offsets = arm_get_frame_offsets ();
11176 live_regs_mask = offsets->saved_regs_mask;
11178 if (live_regs_mask)
11180 const char * return_reg;
11182 /* If we do not have any special requirements for function exit
11183 (e.g. interworking) then we can load the return address
11184 directly into the PC. Otherwise we must load it into LR. */
11186 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
11187 return_reg = reg_names[PC_REGNUM];
11189 return_reg = reg_names[LR_REGNUM];
11191 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
11193 /* There are three possible reasons for the IP register
11194 being saved. 1) a stack frame was created, in which case
11195 IP contains the old stack pointer, or 2) an ISR routine
11196 corrupted it, or 3) it was saved to align the stack on
11197 iWMMXt. In case 1, restore IP into SP, otherwise just
11199 if (frame_pointer_needed)
11201 live_regs_mask &= ~ (1 << IP_REGNUM);
11202 live_regs_mask |= (1 << SP_REGNUM);
11205 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
11208 /* On some ARM architectures it is faster to use LDR rather than
11209 LDM to load a single register. On other architectures, the
11210 cost is the same. In 26 bit mode, or for exception handlers,
11211 we have to use LDM to load the PC so that the CPSR is also
11213 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11214 if (live_regs_mask == (1U << reg))
11217 if (reg <= LAST_ARM_REGNUM
11218 && (reg != LR_REGNUM
11220 || ! IS_INTERRUPT (func_type)))
11222 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
11223 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
11230 /* Generate the load multiple instruction to restore the
11231 registers. Note we can get here, even if
11232 frame_pointer_needed is true, but only if sp already
11233 points to the base of the saved core registers. */
11234 if (live_regs_mask & (1 << SP_REGNUM))
11236 unsigned HOST_WIDE_INT stack_adjust;
11238 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
11239 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
11241 if (stack_adjust && arm_arch5 && TARGET_ARM)
11242 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
11245 /* If we can't use ldmib (SA110 bug),
11246 then try to pop r3 instead. */
11248 live_regs_mask |= 1 << 3;
11249 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
11253 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
11255 p = instr + strlen (instr);
11257 for (reg = 0; reg <= SP_REGNUM; reg++)
11258 if (live_regs_mask & (1 << reg))
11260 int l = strlen (reg_names[reg]);
11266 memcpy (p, ", ", 2);
11270 memcpy (p, "%|", 2);
11271 memcpy (p + 2, reg_names[reg], l);
11275 if (live_regs_mask & (1 << LR_REGNUM))
11277 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
11278 /* If returning from an interrupt, restore the CPSR. */
11279 if (IS_INTERRUPT (func_type))
11286 output_asm_insn (instr, & operand);
11288 /* See if we need to generate an extra instruction to
11289 perform the actual function return. */
11291 && func_type != ARM_FT_INTERWORKED
11292 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
11294 /* The return has already been handled
11295 by loading the LR into the PC. */
11302 switch ((int) ARM_FUNC_TYPE (func_type))
11306 /* ??? This is wrong for unified assembly syntax. */
11307 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
11310 case ARM_FT_INTERWORKED:
11311 sprintf (instr, "bx%s\t%%|lr", conditional);
11314 case ARM_FT_EXCEPTION:
11315 /* ??? This is wrong for unified assembly syntax. */
11316 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
11320 /* Use bx if it's available. */
11321 if (arm_arch5 || arm_arch4t)
11322 sprintf (instr, "bx%s\t%%|lr", conditional);
11324 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
11328 output_asm_insn (instr, & operand);
11334 /* Write the function name into the code section, directly preceding
11335 the function prologue.
11337 Code will be output similar to this:
11339 .ascii "arm_poke_function_name", 0
11342 .word 0xff000000 + (t1 - t0)
11343 arm_poke_function_name
11345 stmfd sp!, {fp, ip, lr, pc}
11348 When performing a stack backtrace, code can inspect the value
11349 of 'pc' stored at 'fp' + 0. If the trace function then looks
11350 at location pc - 12 and the top 8 bits are set, then we know
11351 that there is a function name embedded immediately preceding this
11352 location and has length ((pc[-3]) & 0xff000000).
11354 We assume that pc is declared as a pointer to an unsigned long.
11356 It is of no benefit to output the function name if we are assembling
11357 a leaf function. These function types will not contain a stack
11358 backtrace structure, therefore it is not possible to determine the
11361 arm_poke_function_name (FILE *stream, const char *name)
11363 unsigned long alignlength;
11364 unsigned long length;
11367 length = strlen (name) + 1;
11368 alignlength = ROUND_UP_WORD (length);
11370 ASM_OUTPUT_ASCII (stream, name, length);
11371 ASM_OUTPUT_ALIGN (stream, 2);
11372 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
11373 assemble_aligned_integer (UNITS_PER_WORD, x);
11376 /* Place some comments into the assembler stream
11377 describing the current function. */
11379 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
11381 unsigned long func_type;
11385 thumb1_output_function_prologue (f, frame_size);
11389 /* Sanity check. */
11390 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
11392 func_type = arm_current_func_type ();
11394 switch ((int) ARM_FUNC_TYPE (func_type))
11397 case ARM_FT_NORMAL:
11399 case ARM_FT_INTERWORKED:
11400 asm_fprintf (f, "\t%@ Function supports interworking.\n");
11403 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
11406 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
11408 case ARM_FT_EXCEPTION:
11409 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
11413 if (IS_NAKED (func_type))
11414 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
11416 if (IS_VOLATILE (func_type))
11417 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
11419 if (IS_NESTED (func_type))
11420 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
11421 if (IS_STACKALIGN (func_type))
11422 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
11424 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
11426 crtl->args.pretend_args_size, frame_size);
11428 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
11429 frame_pointer_needed,
11430 cfun->machine->uses_anonymous_args);
11432 if (cfun->machine->lr_save_eliminated)
11433 asm_fprintf (f, "\t%@ link register save eliminated.\n");
11435 if (crtl->calls_eh_return)
11436 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
11438 return_used_this_function = 0;
11442 arm_output_epilogue (rtx sibling)
11445 unsigned long saved_regs_mask;
11446 unsigned long func_type;
11447 /* Floats_offset is the offset from the "virtual" frame. In an APCS
11448 frame that is $fp + 4 for a non-variadic function. */
11449 int floats_offset = 0;
11451 FILE * f = asm_out_file;
11452 unsigned int lrm_count = 0;
11453 int really_return = (sibling == NULL);
11455 arm_stack_offsets *offsets;
11457 /* If we have already generated the return instruction
11458 then it is futile to generate anything else. */
11459 if (use_return_insn (FALSE, sibling) && return_used_this_function)
11462 func_type = arm_current_func_type ();
11464 if (IS_NAKED (func_type))
11465 /* Naked functions don't have epilogues. */
11468 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11472 /* A volatile function should never return. Call abort. */
11473 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
11474 assemble_external_libcall (op);
11475 output_asm_insn ("bl\t%a0", &op);
11480 /* If we are throwing an exception, then we really must be doing a
11481 return, so we can't tail-call. */
11482 gcc_assert (!crtl->calls_eh_return || really_return);
11484 offsets = arm_get_frame_offsets ();
11485 saved_regs_mask = offsets->saved_regs_mask;
11488 lrm_count = bit_count (saved_regs_mask);
11490 floats_offset = offsets->saved_args;
11491 /* Compute how far away the floats will be. */
11492 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11493 if (saved_regs_mask & (1 << reg))
11494 floats_offset += 4;
11496 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
11498 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
11499 int vfp_offset = offsets->frame;
11501 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11503 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11504 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11506 floats_offset += 12;
11507 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
11508 reg, FP_REGNUM, floats_offset - vfp_offset);
11513 start_reg = LAST_FPA_REGNUM;
11515 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11517 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11519 floats_offset += 12;
11521 /* We can't unstack more than four registers at once. */
11522 if (start_reg - reg == 3)
11524 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
11525 reg, FP_REGNUM, floats_offset - vfp_offset);
11526 start_reg = reg - 1;
11531 if (reg != start_reg)
11532 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11533 reg + 1, start_reg - reg,
11534 FP_REGNUM, floats_offset - vfp_offset);
11535 start_reg = reg - 1;
11539 /* Just in case the last register checked also needs unstacking. */
11540 if (reg != start_reg)
11541 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11542 reg + 1, start_reg - reg,
11543 FP_REGNUM, floats_offset - vfp_offset);
11546 if (TARGET_HARD_FLOAT && TARGET_VFP)
11550 /* The fldmd insns do not have base+offset addressing
11551 modes, so we use IP to hold the address. */
11552 saved_size = arm_get_vfp_saved_size ();
11554 if (saved_size > 0)
11556 floats_offset += saved_size;
11557 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
11558 FP_REGNUM, floats_offset - vfp_offset);
11560 start_reg = FIRST_VFP_REGNUM;
11561 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11563 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11564 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11566 if (start_reg != reg)
11567 vfp_output_fldmd (f, IP_REGNUM,
11568 (start_reg - FIRST_VFP_REGNUM) / 2,
11569 (reg - start_reg) / 2);
11570 start_reg = reg + 2;
11573 if (start_reg != reg)
11574 vfp_output_fldmd (f, IP_REGNUM,
11575 (start_reg - FIRST_VFP_REGNUM) / 2,
11576 (reg - start_reg) / 2);
11581 /* The frame pointer is guaranteed to be non-double-word aligned.
11582 This is because it is set to (old_stack_pointer - 4) and the
11583 old_stack_pointer was double word aligned. Thus the offset to
11584 the iWMMXt registers to be loaded must also be non-double-word
11585 sized, so that the resultant address *is* double-word aligned.
11586 We can ignore floats_offset since that was already included in
11587 the live_regs_mask. */
11588 lrm_count += (lrm_count % 2 ? 2 : 1);
11590 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
11591 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11593 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
11594 reg, FP_REGNUM, lrm_count * 4);
11599 /* saved_regs_mask should contain the IP, which at the time of stack
11600 frame generation actually contains the old stack pointer. So a
11601 quick way to unwind the stack is just pop the IP register directly
11602 into the stack pointer. */
11603 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
11604 saved_regs_mask &= ~ (1 << IP_REGNUM);
11605 saved_regs_mask |= (1 << SP_REGNUM);
11607 /* There are two registers left in saved_regs_mask - LR and PC. We
11608 only need to restore the LR register (the return address), but to
11609 save time we can load it directly into the PC, unless we need a
11610 special function exit sequence, or we are not really returning. */
11612 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
11613 && !crtl->calls_eh_return)
11614 /* Delete the LR from the register mask, so that the LR on
11615 the stack is loaded into the PC in the register mask. */
11616 saved_regs_mask &= ~ (1 << LR_REGNUM);
11618 saved_regs_mask &= ~ (1 << PC_REGNUM);
11620 /* We must use SP as the base register, because SP is one of the
11621 registers being restored. If an interrupt or page fault
11622 happens in the ldm instruction, the SP might or might not
11623 have been restored. That would be bad, as then SP will no
11624 longer indicate the safe area of stack, and we can get stack
11625 corruption. Using SP as the base register means that it will
11626 be reset correctly to the original value, should an interrupt
11627 occur. If the stack pointer already points at the right
11628 place, then omit the subtraction. */
11629 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
11630 || cfun->calls_alloca)
11631 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
11632 4 * bit_count (saved_regs_mask));
11633 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
11635 if (IS_INTERRUPT (func_type))
11636 /* Interrupt handlers will have pushed the
11637 IP onto the stack, so restore it now. */
11638 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
11642 /* This branch is executed for ARM mode (non-apcs frames) and
11643 Thumb-2 mode. Frame layout is essentially the same for those
11644 cases, except that in ARM mode frame pointer points to the
11645 first saved register, while in Thumb-2 mode the frame pointer points
11646 to the last saved register.
11648 It is possible to make frame pointer point to last saved
11649 register in both cases, and remove some conditionals below.
11650 That means that fp setup in prologue would be just "mov fp, sp"
11651 and sp restore in epilogue would be just "mov sp, fp", whereas
11652 now we have to use add/sub in those cases. However, the value
11653 of that would be marginal, as both mov and add/sub are 32-bit
11654 in ARM mode, and it would require extra conditionals
11655 in arm_expand_prologue to distingish ARM-apcs-frame case
11656 (where frame pointer is required to point at first register)
11657 and ARM-non-apcs-frame. Therefore, such change is postponed
11658 until real need arise. */
11659 HOST_WIDE_INT amount;
11661 /* Restore stack pointer if necessary. */
11662 if (TARGET_ARM && frame_pointer_needed)
11664 operands[0] = stack_pointer_rtx;
11665 operands[1] = hard_frame_pointer_rtx;
11667 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
11668 output_add_immediate (operands);
11672 if (frame_pointer_needed)
11674 /* For Thumb-2 restore sp from the frame pointer.
11675 Operand restrictions mean we have to incrememnt FP, then copy
11677 amount = offsets->locals_base - offsets->saved_regs;
11678 operands[0] = hard_frame_pointer_rtx;
11682 unsigned long count;
11683 operands[0] = stack_pointer_rtx;
11684 amount = offsets->outgoing_args - offsets->saved_regs;
11685 /* pop call clobbered registers if it avoids a
11686 separate stack adjustment. */
11687 count = offsets->saved_regs - offsets->saved_args;
11690 && !crtl->calls_eh_return
11691 && bit_count(saved_regs_mask) * 4 == count
11692 && !IS_INTERRUPT (func_type)
11693 && !crtl->tail_call_emit)
11695 unsigned long mask;
11696 mask = (1 << (arm_size_return_regs() / 4)) - 1;
11698 mask &= ~saved_regs_mask;
11700 while (bit_count (mask) * 4 > amount)
11702 while ((mask & (1 << reg)) == 0)
11704 mask &= ~(1 << reg);
11706 if (bit_count (mask) * 4 == amount) {
11708 saved_regs_mask |= mask;
11715 operands[1] = operands[0];
11716 operands[2] = GEN_INT (amount);
11717 output_add_immediate (operands);
11719 if (frame_pointer_needed)
11720 asm_fprintf (f, "\tmov\t%r, %r\n",
11721 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
11724 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11726 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11727 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11728 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
11733 start_reg = FIRST_FPA_REGNUM;
11735 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11737 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11739 if (reg - start_reg == 3)
11741 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
11742 start_reg, SP_REGNUM);
11743 start_reg = reg + 1;
11748 if (reg != start_reg)
11749 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11750 start_reg, reg - start_reg,
11753 start_reg = reg + 1;
11757 /* Just in case the last register checked also needs unstacking. */
11758 if (reg != start_reg)
11759 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11760 start_reg, reg - start_reg, SP_REGNUM);
11763 if (TARGET_HARD_FLOAT && TARGET_VFP)
11765 start_reg = FIRST_VFP_REGNUM;
11766 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11768 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11769 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11771 if (start_reg != reg)
11772 vfp_output_fldmd (f, SP_REGNUM,
11773 (start_reg - FIRST_VFP_REGNUM) / 2,
11774 (reg - start_reg) / 2);
11775 start_reg = reg + 2;
11778 if (start_reg != reg)
11779 vfp_output_fldmd (f, SP_REGNUM,
11780 (start_reg - FIRST_VFP_REGNUM) / 2,
11781 (reg - start_reg) / 2);
11784 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
11785 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11786 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
11788 /* If we can, restore the LR into the PC. */
11789 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
11790 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
11791 && !IS_STACKALIGN (func_type)
11793 && crtl->args.pretend_args_size == 0
11794 && saved_regs_mask & (1 << LR_REGNUM)
11795 && !crtl->calls_eh_return)
11797 saved_regs_mask &= ~ (1 << LR_REGNUM);
11798 saved_regs_mask |= (1 << PC_REGNUM);
11799 rfe = IS_INTERRUPT (func_type);
11804 /* Load the registers off the stack. If we only have one register
11805 to load use the LDR instruction - it is faster. For Thumb-2
11806 always use pop and the assembler will pick the best instruction.*/
11807 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
11808 && !IS_INTERRUPT(func_type))
11810 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
11812 else if (saved_regs_mask)
11814 if (saved_regs_mask & (1 << SP_REGNUM))
11815 /* Note - write back to the stack register is not enabled
11816 (i.e. "ldmfd sp!..."). We know that the stack pointer is
11817 in the list of registers and if we add writeback the
11818 instruction becomes UNPREDICTABLE. */
11819 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
11821 else if (TARGET_ARM)
11822 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
11825 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
11828 if (crtl->args.pretend_args_size)
11830 /* Unwind the pre-pushed regs. */
11831 operands[0] = operands[1] = stack_pointer_rtx;
11832 operands[2] = GEN_INT (crtl->args.pretend_args_size);
11833 output_add_immediate (operands);
11837 /* We may have already restored PC directly from the stack. */
11838 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
11841 /* Stack adjustment for exception handler. */
11842 if (crtl->calls_eh_return)
11843 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
11844 ARM_EH_STACKADJ_REGNUM);
11846 /* Generate the return instruction. */
11847 switch ((int) ARM_FUNC_TYPE (func_type))
11851 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
11854 case ARM_FT_EXCEPTION:
11855 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11858 case ARM_FT_INTERWORKED:
11859 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11863 if (IS_STACKALIGN (func_type))
11865 /* See comment in arm_expand_prologue. */
11866 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
11868 if (arm_arch5 || arm_arch4t)
11869 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11871 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11879 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11880 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
11882 arm_stack_offsets *offsets;
11888 /* Emit any call-via-reg trampolines that are needed for v4t support
11889 of call_reg and call_value_reg type insns. */
11890 for (regno = 0; regno < LR_REGNUM; regno++)
11892 rtx label = cfun->machine->call_via[regno];
11896 switch_to_section (function_section (current_function_decl));
11897 targetm.asm_out.internal_label (asm_out_file, "L",
11898 CODE_LABEL_NUMBER (label));
11899 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
11903 /* ??? Probably not safe to set this here, since it assumes that a
11904 function will be emitted as assembly immediately after we generate
11905 RTL for it. This does not happen for inline functions. */
11906 return_used_this_function = 0;
11908 else /* TARGET_32BIT */
11910 /* We need to take into account any stack-frame rounding. */
11911 offsets = arm_get_frame_offsets ();
11913 gcc_assert (!use_return_insn (FALSE, NULL)
11914 || !return_used_this_function
11915 || offsets->saved_regs == offsets->outgoing_args
11916 || frame_pointer_needed);
11918 /* Reset the ARM-specific per-function variables. */
11919 after_arm_reorg = 0;
11923 /* Generate and emit an insn that we will recognize as a push_multi.
11924 Unfortunately, since this insn does not reflect very well the actual
11925 semantics of the operation, we need to annotate the insn for the benefit
11926 of DWARF2 frame unwind information. */
11928 emit_multi_reg_push (unsigned long mask)
11931 int num_dwarf_regs;
11935 int dwarf_par_index;
11938 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11939 if (mask & (1 << i))
11942 gcc_assert (num_regs && num_regs <= 16);
11944 /* We don't record the PC in the dwarf frame information. */
11945 num_dwarf_regs = num_regs;
11946 if (mask & (1 << PC_REGNUM))
11949 /* For the body of the insn we are going to generate an UNSPEC in
11950 parallel with several USEs. This allows the insn to be recognized
11951 by the push_multi pattern in the arm.md file. The insn looks
11952 something like this:
11955 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
11956 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
11957 (use (reg:SI 11 fp))
11958 (use (reg:SI 12 ip))
11959 (use (reg:SI 14 lr))
11960 (use (reg:SI 15 pc))
11963 For the frame note however, we try to be more explicit and actually
11964 show each register being stored into the stack frame, plus a (single)
11965 decrement of the stack pointer. We do it this way in order to be
11966 friendly to the stack unwinding code, which only wants to see a single
11967 stack decrement per instruction. The RTL we generate for the note looks
11968 something like this:
11971 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
11972 (set (mem:SI (reg:SI sp)) (reg:SI r4))
11973 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
11974 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
11975 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
11978 This sequence is used both by the code to support stack unwinding for
11979 exceptions handlers and the code to generate dwarf2 frame debugging. */
11981 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
11982 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
11983 dwarf_par_index = 1;
11985 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11987 if (mask & (1 << i))
11989 reg = gen_rtx_REG (SImode, i);
11991 XVECEXP (par, 0, 0)
11992 = gen_rtx_SET (VOIDmode,
11993 gen_frame_mem (BLKmode,
11994 gen_rtx_PRE_DEC (BLKmode,
11995 stack_pointer_rtx)),
11996 gen_rtx_UNSPEC (BLKmode,
11997 gen_rtvec (1, reg),
11998 UNSPEC_PUSH_MULT));
12000 if (i != PC_REGNUM)
12002 tmp = gen_rtx_SET (VOIDmode,
12003 gen_frame_mem (SImode, stack_pointer_rtx),
12005 RTX_FRAME_RELATED_P (tmp) = 1;
12006 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
12014 for (j = 1, i++; j < num_regs; i++)
12016 if (mask & (1 << i))
12018 reg = gen_rtx_REG (SImode, i);
12020 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
12022 if (i != PC_REGNUM)
12025 = gen_rtx_SET (VOIDmode,
12026 gen_frame_mem (SImode,
12027 plus_constant (stack_pointer_rtx,
12030 RTX_FRAME_RELATED_P (tmp) = 1;
12031 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
12038 par = emit_insn (par);
12040 tmp = gen_rtx_SET (VOIDmode,
12042 plus_constant (stack_pointer_rtx, -4 * num_regs));
12043 RTX_FRAME_RELATED_P (tmp) = 1;
12044 XVECEXP (dwarf, 0, 0) = tmp;
12046 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
12051 /* Calculate the size of the return value that is passed in registers. */
12053 arm_size_return_regs (void)
12055 enum machine_mode mode;
12057 if (crtl->return_rtx != 0)
12058 mode = GET_MODE (crtl->return_rtx);
12060 mode = DECL_MODE (DECL_RESULT (current_function_decl));
12062 return GET_MODE_SIZE (mode);
12066 emit_sfm (int base_reg, int count)
12073 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12074 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12076 reg = gen_rtx_REG (XFmode, base_reg++);
12078 XVECEXP (par, 0, 0)
12079 = gen_rtx_SET (VOIDmode,
12080 gen_frame_mem (BLKmode,
12081 gen_rtx_PRE_DEC (BLKmode,
12082 stack_pointer_rtx)),
12083 gen_rtx_UNSPEC (BLKmode,
12084 gen_rtvec (1, reg),
12085 UNSPEC_PUSH_MULT));
12086 tmp = gen_rtx_SET (VOIDmode,
12087 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
12088 RTX_FRAME_RELATED_P (tmp) = 1;
12089 XVECEXP (dwarf, 0, 1) = tmp;
12091 for (i = 1; i < count; i++)
12093 reg = gen_rtx_REG (XFmode, base_reg++);
12094 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12096 tmp = gen_rtx_SET (VOIDmode,
12097 gen_frame_mem (XFmode,
12098 plus_constant (stack_pointer_rtx,
12101 RTX_FRAME_RELATED_P (tmp) = 1;
12102 XVECEXP (dwarf, 0, i + 1) = tmp;
12105 tmp = gen_rtx_SET (VOIDmode,
12107 plus_constant (stack_pointer_rtx, -12 * count));
12109 RTX_FRAME_RELATED_P (tmp) = 1;
12110 XVECEXP (dwarf, 0, 0) = tmp;
12112 par = emit_insn (par);
12113 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
12119 /* Return true if the current function needs to save/restore LR. */
12122 thumb_force_lr_save (void)
12124 return !cfun->machine->lr_save_eliminated
12125 && (!leaf_function_p ()
12126 || thumb_far_jump_used_p ()
12127 || df_regs_ever_live_p (LR_REGNUM));
12131 /* Compute the distance from register FROM to register TO.
12132 These can be the arg pointer (26), the soft frame pointer (25),
12133 the stack pointer (13) or the hard frame pointer (11).
12134 In thumb mode r7 is used as the soft frame pointer, if needed.
12135 Typical stack layout looks like this:
12137 old stack pointer -> | |
12140 | | saved arguments for
12141 | | vararg functions
12144 hard FP & arg pointer -> | | \
12152 soft frame pointer -> | | /
12157 locals base pointer -> | | /
12162 current stack pointer -> | | /
12165 For a given function some or all of these stack components
12166 may not be needed, giving rise to the possibility of
12167 eliminating some of the registers.
12169 The values returned by this function must reflect the behavior
12170 of arm_expand_prologue() and arm_compute_save_reg_mask().
12172 The sign of the number returned reflects the direction of stack
12173 growth, so the values are positive for all eliminations except
12174 from the soft frame pointer to the hard frame pointer.
12176 SFP may point just inside the local variables block to ensure correct
12180 /* Calculate stack offsets. These are used to calculate register elimination
12181 offsets and in prologue/epilogue code. Also calculates which registers
12182 should be saved. */
12184 static arm_stack_offsets *
12185 arm_get_frame_offsets (void)
12187 struct arm_stack_offsets *offsets;
12188 unsigned long func_type;
12192 HOST_WIDE_INT frame_size;
12195 offsets = &cfun->machine->stack_offsets;
12197 /* We need to know if we are a leaf function. Unfortunately, it
12198 is possible to be called after start_sequence has been called,
12199 which causes get_insns to return the insns for the sequence,
12200 not the function, which will cause leaf_function_p to return
12201 the incorrect result.
12203 to know about leaf functions once reload has completed, and the
12204 frame size cannot be changed after that time, so we can safely
12205 use the cached value. */
12207 if (reload_completed)
12210 /* Initially this is the size of the local variables. It will translated
12211 into an offset once we have determined the size of preceding data. */
12212 frame_size = ROUND_UP_WORD (get_frame_size ());
12214 leaf = leaf_function_p ();
12216 /* Space for variadic functions. */
12217 offsets->saved_args = crtl->args.pretend_args_size;
12219 /* In Thumb mode this is incorrect, but never used. */
12220 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
12221 arm_compute_static_chain_stack_bytes();
12225 unsigned int regno;
12227 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
12228 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12229 saved = core_saved;
12231 /* We know that SP will be doubleword aligned on entry, and we must
12232 preserve that condition at any subroutine call. We also require the
12233 soft frame pointer to be doubleword aligned. */
12235 if (TARGET_REALLY_IWMMXT)
12237 /* Check for the call-saved iWMMXt registers. */
12238 for (regno = FIRST_IWMMXT_REGNUM;
12239 regno <= LAST_IWMMXT_REGNUM;
12241 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12245 func_type = arm_current_func_type ();
12246 if (! IS_VOLATILE (func_type))
12248 /* Space for saved FPA registers. */
12249 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
12250 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12253 /* Space for saved VFP registers. */
12254 if (TARGET_HARD_FLOAT && TARGET_VFP)
12255 saved += arm_get_vfp_saved_size ();
12258 else /* TARGET_THUMB1 */
12260 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
12261 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12262 saved = core_saved;
12263 if (TARGET_BACKTRACE)
12267 /* Saved registers include the stack frame. */
12268 offsets->saved_regs = offsets->saved_args + saved +
12269 arm_compute_static_chain_stack_bytes();
12270 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
12271 /* A leaf function does not need any stack alignment if it has nothing
12273 if (leaf && frame_size == 0)
12275 offsets->outgoing_args = offsets->soft_frame;
12276 offsets->locals_base = offsets->soft_frame;
12280 /* Ensure SFP has the correct alignment. */
12281 if (ARM_DOUBLEWORD_ALIGN
12282 && (offsets->soft_frame & 7))
12284 offsets->soft_frame += 4;
12285 /* Try to align stack by pushing an extra reg. Don't bother doing this
12286 when there is a stack frame as the alignment will be rolled into
12287 the normal stack adjustment. */
12288 if (frame_size + crtl->outgoing_args_size == 0)
12292 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
12294 if ((offsets->saved_regs_mask & (1 << i)) == 0)
12301 if (reg == -1 && arm_size_return_regs () <= 12
12302 && !crtl->tail_call_emit)
12304 /* Push/pop an argument register (r3) if all callee saved
12305 registers are already being pushed. */
12311 offsets->saved_regs += 4;
12312 offsets->saved_regs_mask |= (1 << reg);
12317 offsets->locals_base = offsets->soft_frame + frame_size;
12318 offsets->outgoing_args = (offsets->locals_base
12319 + crtl->outgoing_args_size);
12321 if (ARM_DOUBLEWORD_ALIGN)
12323 /* Ensure SP remains doubleword aligned. */
12324 if (offsets->outgoing_args & 7)
12325 offsets->outgoing_args += 4;
12326 gcc_assert (!(offsets->outgoing_args & 7));
12333 /* Calculate the relative offsets for the different stack pointers. Positive
12334 offsets are in the direction of stack growth. */
12337 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
12339 arm_stack_offsets *offsets;
12341 offsets = arm_get_frame_offsets ();
12343 /* OK, now we have enough information to compute the distances.
12344 There must be an entry in these switch tables for each pair
12345 of registers in ELIMINABLE_REGS, even if some of the entries
12346 seem to be redundant or useless. */
12349 case ARG_POINTER_REGNUM:
12352 case THUMB_HARD_FRAME_POINTER_REGNUM:
12355 case FRAME_POINTER_REGNUM:
12356 /* This is the reverse of the soft frame pointer
12357 to hard frame pointer elimination below. */
12358 return offsets->soft_frame - offsets->saved_args;
12360 case ARM_HARD_FRAME_POINTER_REGNUM:
12361 /* This is only non-zero in the case where the static chain register
12362 is stored above the frame. */
12363 return offsets->frame - offsets->saved_args - 4;
12365 case STACK_POINTER_REGNUM:
12366 /* If nothing has been pushed on the stack at all
12367 then this will return -4. This *is* correct! */
12368 return offsets->outgoing_args - (offsets->saved_args + 4);
12371 gcc_unreachable ();
12373 gcc_unreachable ();
12375 case FRAME_POINTER_REGNUM:
12378 case THUMB_HARD_FRAME_POINTER_REGNUM:
12381 case ARM_HARD_FRAME_POINTER_REGNUM:
12382 /* The hard frame pointer points to the top entry in the
12383 stack frame. The soft frame pointer to the bottom entry
12384 in the stack frame. If there is no stack frame at all,
12385 then they are identical. */
12387 return offsets->frame - offsets->soft_frame;
12389 case STACK_POINTER_REGNUM:
12390 return offsets->outgoing_args - offsets->soft_frame;
12393 gcc_unreachable ();
12395 gcc_unreachable ();
12398 /* You cannot eliminate from the stack pointer.
12399 In theory you could eliminate from the hard frame
12400 pointer to the stack pointer, but this will never
12401 happen, since if a stack frame is not needed the
12402 hard frame pointer will never be used. */
12403 gcc_unreachable ();
12408 /* Emit RTL to save coprocessor registers on function entry. Returns the
12409 number of bytes pushed. */
12412 arm_save_coproc_regs(void)
12414 int saved_size = 0;
12416 unsigned start_reg;
12419 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12420 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12422 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
12423 insn = gen_rtx_MEM (V2SImode, insn);
12424 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
12425 RTX_FRAME_RELATED_P (insn) = 1;
12429 /* Save any floating point call-saved registers used by this
12431 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12433 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12434 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12436 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
12437 insn = gen_rtx_MEM (XFmode, insn);
12438 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
12439 RTX_FRAME_RELATED_P (insn) = 1;
12445 start_reg = LAST_FPA_REGNUM;
12447 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12449 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12451 if (start_reg - reg == 3)
12453 insn = emit_sfm (reg, 4);
12454 RTX_FRAME_RELATED_P (insn) = 1;
12456 start_reg = reg - 1;
12461 if (start_reg != reg)
12463 insn = emit_sfm (reg + 1, start_reg - reg);
12464 RTX_FRAME_RELATED_P (insn) = 1;
12465 saved_size += (start_reg - reg) * 12;
12467 start_reg = reg - 1;
12471 if (start_reg != reg)
12473 insn = emit_sfm (reg + 1, start_reg - reg);
12474 saved_size += (start_reg - reg) * 12;
12475 RTX_FRAME_RELATED_P (insn) = 1;
12478 if (TARGET_HARD_FLOAT && TARGET_VFP)
12480 start_reg = FIRST_VFP_REGNUM;
12482 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12484 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12485 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12487 if (start_reg != reg)
12488 saved_size += vfp_emit_fstmd (start_reg,
12489 (reg - start_reg) / 2);
12490 start_reg = reg + 2;
12493 if (start_reg != reg)
12494 saved_size += vfp_emit_fstmd (start_reg,
12495 (reg - start_reg) / 2);
12501 /* Set the Thumb frame pointer from the stack pointer. */
12504 thumb_set_frame_pointer (arm_stack_offsets *offsets)
12506 HOST_WIDE_INT amount;
12509 amount = offsets->outgoing_args - offsets->locals_base;
12511 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12512 stack_pointer_rtx, GEN_INT (amount)));
12515 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
12516 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
12517 expects the first two operands to be the same. */
12520 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12522 hard_frame_pointer_rtx));
12526 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12527 hard_frame_pointer_rtx,
12528 stack_pointer_rtx));
12530 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
12531 plus_constant (stack_pointer_rtx, amount));
12532 RTX_FRAME_RELATED_P (dwarf) = 1;
12533 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
12537 RTX_FRAME_RELATED_P (insn) = 1;
12540 /* Generate the prologue instructions for entry into an ARM or Thumb-2
12543 arm_expand_prologue (void)
12548 unsigned long live_regs_mask;
12549 unsigned long func_type;
12551 int saved_pretend_args = 0;
12552 int saved_regs = 0;
12553 unsigned HOST_WIDE_INT args_to_push;
12554 arm_stack_offsets *offsets;
12556 func_type = arm_current_func_type ();
12558 /* Naked functions don't have prologues. */
12559 if (IS_NAKED (func_type))
12562 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
12563 args_to_push = crtl->args.pretend_args_size;
12565 /* Compute which register we will have to save onto the stack. */
12566 offsets = arm_get_frame_offsets ();
12567 live_regs_mask = offsets->saved_regs_mask;
12569 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
12571 if (IS_STACKALIGN (func_type))
12576 /* Handle a word-aligned stack pointer. We generate the following:
12581 <save and restore r0 in normal prologue/epilogue>
12585 The unwinder doesn't need to know about the stack realignment.
12586 Just tell it we saved SP in r0. */
12587 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
12589 r0 = gen_rtx_REG (SImode, 0);
12590 r1 = gen_rtx_REG (SImode, 1);
12591 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
12592 compiler won't choke. */
12593 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
12594 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
12595 insn = gen_movsi (r0, stack_pointer_rtx);
12596 RTX_FRAME_RELATED_P (insn) = 1;
12597 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12598 dwarf, REG_NOTES (insn));
12600 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
12601 emit_insn (gen_movsi (stack_pointer_rtx, r1));
12604 /* For APCS frames, if IP register is clobbered
12605 when creating frame, save that register in a special
12607 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
12609 if (IS_INTERRUPT (func_type))
12611 /* Interrupt functions must not corrupt any registers.
12612 Creating a frame pointer however, corrupts the IP
12613 register, so we must push it first. */
12614 insn = emit_multi_reg_push (1 << IP_REGNUM);
12616 /* Do not set RTX_FRAME_RELATED_P on this insn.
12617 The dwarf stack unwinding code only wants to see one
12618 stack decrement per function, and this is not it. If
12619 this instruction is labeled as being part of the frame
12620 creation sequence then dwarf2out_frame_debug_expr will
12621 die when it encounters the assignment of IP to FP
12622 later on, since the use of SP here establishes SP as
12623 the CFA register and not IP.
12625 Anyway this instruction is not really part of the stack
12626 frame creation although it is part of the prologue. */
12628 else if (IS_NESTED (func_type))
12630 /* The Static chain register is the same as the IP register
12631 used as a scratch register during stack frame creation.
12632 To get around this need to find somewhere to store IP
12633 whilst the frame is being created. We try the following
12636 1. The last argument register.
12637 2. A slot on the stack above the frame. (This only
12638 works if the function is not a varargs function).
12639 3. Register r3, after pushing the argument registers
12642 Note - we only need to tell the dwarf2 backend about the SP
12643 adjustment in the second variant; the static chain register
12644 doesn't need to be unwound, as it doesn't contain a value
12645 inherited from the caller. */
12647 if (df_regs_ever_live_p (3) == false)
12648 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12649 else if (args_to_push == 0)
12651 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
12656 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
12657 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
12660 /* Just tell the dwarf backend that we adjusted SP. */
12661 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12662 plus_constant (stack_pointer_rtx,
12664 RTX_FRAME_RELATED_P (insn) = 1;
12665 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12666 dwarf, REG_NOTES (insn));
12670 /* Store the args on the stack. */
12671 if (cfun->machine->uses_anonymous_args)
12672 insn = emit_multi_reg_push
12673 ((0xf0 >> (args_to_push / 4)) & 0xf);
12676 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12677 GEN_INT (- args_to_push)));
12679 RTX_FRAME_RELATED_P (insn) = 1;
12681 saved_pretend_args = 1;
12682 fp_offset = args_to_push;
12685 /* Now reuse r3 to preserve IP. */
12686 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12690 insn = emit_set_insn (ip_rtx,
12691 plus_constant (stack_pointer_rtx, fp_offset));
12692 RTX_FRAME_RELATED_P (insn) = 1;
12697 /* Push the argument registers, or reserve space for them. */
12698 if (cfun->machine->uses_anonymous_args)
12699 insn = emit_multi_reg_push
12700 ((0xf0 >> (args_to_push / 4)) & 0xf);
12703 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12704 GEN_INT (- args_to_push)));
12705 RTX_FRAME_RELATED_P (insn) = 1;
12708 /* If this is an interrupt service routine, and the link register
12709 is going to be pushed, and we're not generating extra
12710 push of IP (needed when frame is needed and frame layout if apcs),
12711 subtracting four from LR now will mean that the function return
12712 can be done with a single instruction. */
12713 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
12714 && (live_regs_mask & (1 << LR_REGNUM)) != 0
12715 && !(frame_pointer_needed && TARGET_APCS_FRAME)
12718 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
12720 emit_set_insn (lr, plus_constant (lr, -4));
12723 if (live_regs_mask)
12725 saved_regs += bit_count (live_regs_mask) * 4;
12726 if (optimize_size && !frame_pointer_needed
12727 && saved_regs == offsets->saved_regs - offsets->saved_args)
12729 /* If no coprocessor registers are being pushed and we don't have
12730 to worry about a frame pointer then push extra registers to
12731 create the stack frame. This is done is a way that does not
12732 alter the frame layout, so is independent of the epilogue. */
12736 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
12738 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
12739 if (frame && n * 4 >= frame)
12742 live_regs_mask |= (1 << n) - 1;
12743 saved_regs += frame;
12746 insn = emit_multi_reg_push (live_regs_mask);
12747 RTX_FRAME_RELATED_P (insn) = 1;
12750 if (! IS_VOLATILE (func_type))
12751 saved_regs += arm_save_coproc_regs ();
12753 if (frame_pointer_needed && TARGET_ARM)
12755 /* Create the new frame pointer. */
12756 if (TARGET_APCS_FRAME)
12758 insn = GEN_INT (-(4 + args_to_push + fp_offset));
12759 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
12760 RTX_FRAME_RELATED_P (insn) = 1;
12762 if (IS_NESTED (func_type))
12764 /* Recover the static chain register. */
12765 if (!df_regs_ever_live_p (3)
12766 || saved_pretend_args)
12767 insn = gen_rtx_REG (SImode, 3);
12768 else /* if (crtl->args.pretend_args_size == 0) */
12770 insn = plus_constant (hard_frame_pointer_rtx, 4);
12771 insn = gen_frame_mem (SImode, insn);
12773 emit_set_insn (ip_rtx, insn);
12774 /* Add a USE to stop propagate_one_insn() from barfing. */
12775 emit_insn (gen_prologue_use (ip_rtx));
12780 insn = GEN_INT (saved_regs - 4);
12781 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12782 stack_pointer_rtx, insn));
12783 RTX_FRAME_RELATED_P (insn) = 1;
12787 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
12789 /* This add can produce multiple insns for a large constant, so we
12790 need to get tricky. */
12791 rtx last = get_last_insn ();
12793 amount = GEN_INT (offsets->saved_args + saved_regs
12794 - offsets->outgoing_args);
12796 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12800 last = last ? NEXT_INSN (last) : get_insns ();
12801 RTX_FRAME_RELATED_P (last) = 1;
12803 while (last != insn);
12805 /* If the frame pointer is needed, emit a special barrier that
12806 will prevent the scheduler from moving stores to the frame
12807 before the stack adjustment. */
12808 if (frame_pointer_needed)
12809 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
12810 hard_frame_pointer_rtx));
12814 if (frame_pointer_needed && TARGET_THUMB2)
12815 thumb_set_frame_pointer (offsets);
12817 if (flag_pic && arm_pic_register != INVALID_REGNUM)
12819 unsigned long mask;
12821 mask = live_regs_mask;
12822 mask &= THUMB2_WORK_REGS;
12823 if (!IS_NESTED (func_type))
12824 mask |= (1 << IP_REGNUM);
12825 arm_load_pic_register (mask);
12828 /* If we are profiling, make sure no instructions are scheduled before
12829 the call to mcount. Similarly if the user has requested no
12830 scheduling in the prolog. Similarly if we want non-call exceptions
12831 using the EABI unwinder, to prevent faulting instructions from being
12832 swapped with a stack adjustment. */
12833 if (crtl->profile || !TARGET_SCHED_PROLOG
12834 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
12835 emit_insn (gen_blockage ());
12837 /* If the link register is being kept alive, with the return address in it,
12838 then make sure that it does not get reused by the ce2 pass. */
12839 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
12840 cfun->machine->lr_save_eliminated = 1;
12843 /* Print condition code to STREAM. Helper function for arm_print_operand. */
12845 arm_print_condition (FILE *stream)
12847 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
12849 /* Branch conversion is not implemented for Thumb-2. */
12852 output_operand_lossage ("predicated Thumb instruction");
12855 if (current_insn_predicate != NULL)
12857 output_operand_lossage
12858 ("predicated instruction in conditional sequence");
12862 fputs (arm_condition_codes[arm_current_cc], stream);
12864 else if (current_insn_predicate)
12866 enum arm_cond_code code;
12870 output_operand_lossage ("predicated Thumb instruction");
12874 code = get_arm_condition_code (current_insn_predicate);
12875 fputs (arm_condition_codes[code], stream);
12880 /* If CODE is 'd', then the X is a condition operand and the instruction
12881 should only be executed if the condition is true.
12882 if CODE is 'D', then the X is a condition operand and the instruction
12883 should only be executed if the condition is false: however, if the mode
12884 of the comparison is CCFPEmode, then always execute the instruction -- we
12885 do this because in these circumstances !GE does not necessarily imply LT;
12886 in these cases the instruction pattern will take care to make sure that
12887 an instruction containing %d will follow, thereby undoing the effects of
12888 doing this instruction unconditionally.
12889 If CODE is 'N' then X is a floating point operand that must be negated
12891 If CODE is 'B' then output a bitwise inverted value of X (a const int).
12892 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
12894 arm_print_operand (FILE *stream, rtx x, int code)
12899 fputs (ASM_COMMENT_START, stream);
12903 fputs (user_label_prefix, stream);
12907 fputs (REGISTER_PREFIX, stream);
12911 arm_print_condition (stream);
12915 /* Nothing in unified syntax, otherwise the current condition code. */
12916 if (!TARGET_UNIFIED_ASM)
12917 arm_print_condition (stream);
12921 /* The current condition code in unified syntax, otherwise nothing. */
12922 if (TARGET_UNIFIED_ASM)
12923 arm_print_condition (stream);
12927 /* The current condition code for a condition code setting instruction.
12928 Preceded by 's' in unified syntax, otherwise followed by 's'. */
12929 if (TARGET_UNIFIED_ASM)
12931 fputc('s', stream);
12932 arm_print_condition (stream);
12936 arm_print_condition (stream);
12937 fputc('s', stream);
12942 /* If the instruction is conditionally executed then print
12943 the current condition code, otherwise print 's'. */
12944 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
12945 if (current_insn_predicate)
12946 arm_print_condition (stream);
12948 fputc('s', stream);
12951 /* %# is a "break" sequence. It doesn't output anything, but is used to
12952 separate e.g. operand numbers from following text, if that text consists
12953 of further digits which we don't want to be part of the operand
12961 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12962 r = REAL_VALUE_NEGATE (r);
12963 fprintf (stream, "%s", fp_const_from_val (&r));
12967 /* An integer or symbol address without a preceding # sign. */
12969 switch (GET_CODE (x))
12972 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12976 output_addr_const (stream, x);
12980 gcc_unreachable ();
12985 if (GET_CODE (x) == CONST_INT)
12988 val = ARM_SIGN_EXTEND (~INTVAL (x));
12989 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
12993 putc ('~', stream);
12994 output_addr_const (stream, x);
12999 /* The low 16 bits of an immediate constant. */
13000 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
13004 fprintf (stream, "%s", arithmetic_instr (x, 1));
13007 /* Truncate Cirrus shift counts. */
13009 if (GET_CODE (x) == CONST_INT)
13011 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
13014 arm_print_operand (stream, x, 0);
13018 fprintf (stream, "%s", arithmetic_instr (x, 0));
13026 if (!shift_operator (x, SImode))
13028 output_operand_lossage ("invalid shift operand");
13032 shift = shift_op (x, &val);
13036 fprintf (stream, ", %s ", shift);
13038 arm_print_operand (stream, XEXP (x, 1), 0);
13040 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
13045 /* An explanation of the 'Q', 'R' and 'H' register operands:
13047 In a pair of registers containing a DI or DF value the 'Q'
13048 operand returns the register number of the register containing
13049 the least significant part of the value. The 'R' operand returns
13050 the register number of the register containing the most
13051 significant part of the value.
13053 The 'H' operand returns the higher of the two register numbers.
13054 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
13055 same as the 'Q' operand, since the most significant part of the
13056 value is held in the lower number register. The reverse is true
13057 on systems where WORDS_BIG_ENDIAN is false.
13059 The purpose of these operands is to distinguish between cases
13060 where the endian-ness of the values is important (for example
13061 when they are added together), and cases where the endian-ness
13062 is irrelevant, but the order of register operations is important.
13063 For example when loading a value from memory into a register
13064 pair, the endian-ness does not matter. Provided that the value
13065 from the lower memory address is put into the lower numbered
13066 register, and the value from the higher address is put into the
13067 higher numbered register, the load will work regardless of whether
13068 the value being loaded is big-wordian or little-wordian. The
13069 order of the two register loads can matter however, if the address
13070 of the memory location is actually held in one of the registers
13071 being overwritten by the load. */
13073 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13075 output_operand_lossage ("invalid operand for code '%c'", code);
13079 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
13083 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13085 output_operand_lossage ("invalid operand for code '%c'", code);
13089 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
13093 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13095 output_operand_lossage ("invalid operand for code '%c'", code);
13099 asm_fprintf (stream, "%r", REGNO (x) + 1);
13103 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13105 output_operand_lossage ("invalid operand for code '%c'", code);
13109 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
13113 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13115 output_operand_lossage ("invalid operand for code '%c'", code);
13119 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
13123 asm_fprintf (stream, "%r",
13124 GET_CODE (XEXP (x, 0)) == REG
13125 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
13129 asm_fprintf (stream, "{%r-%r}",
13131 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
13134 /* Like 'M', but writing doubleword vector registers, for use by Neon
13138 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
13139 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
13141 asm_fprintf (stream, "{d%d}", regno);
13143 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
13148 /* CONST_TRUE_RTX means always -- that's the default. */
13149 if (x == const_true_rtx)
13152 if (!COMPARISON_P (x))
13154 output_operand_lossage ("invalid operand for code '%c'", code);
13158 fputs (arm_condition_codes[get_arm_condition_code (x)],
13163 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
13164 want to do that. */
13165 if (x == const_true_rtx)
13167 output_operand_lossage ("instruction never executed");
13170 if (!COMPARISON_P (x))
13172 output_operand_lossage ("invalid operand for code '%c'", code);
13176 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
13177 (get_arm_condition_code (x))],
13181 /* Cirrus registers can be accessed in a variety of ways:
13182 single floating point (f)
13183 double floating point (d)
13185 64bit integer (dx). */
13186 case 'W': /* Cirrus register in F mode. */
13187 case 'X': /* Cirrus register in D mode. */
13188 case 'Y': /* Cirrus register in FX mode. */
13189 case 'Z': /* Cirrus register in DX mode. */
13190 gcc_assert (GET_CODE (x) == REG
13191 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
13193 fprintf (stream, "mv%s%s",
13195 : code == 'X' ? "d"
13196 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
13200 /* Print cirrus register in the mode specified by the register's mode. */
13203 int mode = GET_MODE (x);
13205 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
13207 output_operand_lossage ("invalid operand for code '%c'", code);
13211 fprintf (stream, "mv%s%s",
13212 mode == DFmode ? "d"
13213 : mode == SImode ? "fx"
13214 : mode == DImode ? "dx"
13215 : "f", reg_names[REGNO (x)] + 2);
13221 if (GET_CODE (x) != REG
13222 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
13223 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
13224 /* Bad value for wCG register number. */
13226 output_operand_lossage ("invalid operand for code '%c'", code);
13231 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
13234 /* Print an iWMMXt control register name. */
13236 if (GET_CODE (x) != CONST_INT
13238 || INTVAL (x) >= 16)
13239 /* Bad value for wC register number. */
13241 output_operand_lossage ("invalid operand for code '%c'", code);
13247 static const char * wc_reg_names [16] =
13249 "wCID", "wCon", "wCSSF", "wCASF",
13250 "wC4", "wC5", "wC6", "wC7",
13251 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
13252 "wC12", "wC13", "wC14", "wC15"
13255 fprintf (stream, wc_reg_names [INTVAL (x)]);
13259 /* Print a VFP/Neon double precision or quad precision register name. */
13263 int mode = GET_MODE (x);
13264 int is_quad = (code == 'q');
13267 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
13269 output_operand_lossage ("invalid operand for code '%c'", code);
13273 if (GET_CODE (x) != REG
13274 || !IS_VFP_REGNUM (REGNO (x)))
13276 output_operand_lossage ("invalid operand for code '%c'", code);
13281 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
13282 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
13284 output_operand_lossage ("invalid operand for code '%c'", code);
13288 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
13289 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
13293 /* These two codes print the low/high doubleword register of a Neon quad
13294 register, respectively. For pair-structure types, can also print
13295 low/high quadword registers. */
13299 int mode = GET_MODE (x);
13302 if ((GET_MODE_SIZE (mode) != 16
13303 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
13305 output_operand_lossage ("invalid operand for code '%c'", code);
13310 if (!NEON_REGNO_OK_FOR_QUAD (regno))
13312 output_operand_lossage ("invalid operand for code '%c'", code);
13316 if (GET_MODE_SIZE (mode) == 16)
13317 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
13318 + (code == 'f' ? 1 : 0));
13320 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
13321 + (code == 'f' ? 1 : 0));
13325 /* Print a VFPv3 floating-point constant, represented as an integer
13329 int index = vfp3_const_double_index (x);
13330 gcc_assert (index != -1);
13331 fprintf (stream, "%d", index);
13335 /* Print bits representing opcode features for Neon.
13337 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
13338 and polynomials as unsigned.
13340 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
13342 Bit 2 is 1 for rounding functions, 0 otherwise. */
13344 /* Identify the type as 's', 'u', 'p' or 'f'. */
13347 HOST_WIDE_INT bits = INTVAL (x);
13348 fputc ("uspf"[bits & 3], stream);
13352 /* Likewise, but signed and unsigned integers are both 'i'. */
13355 HOST_WIDE_INT bits = INTVAL (x);
13356 fputc ("iipf"[bits & 3], stream);
13360 /* As for 'T', but emit 'u' instead of 'p'. */
13363 HOST_WIDE_INT bits = INTVAL (x);
13364 fputc ("usuf"[bits & 3], stream);
13368 /* Bit 2: rounding (vs none). */
13371 HOST_WIDE_INT bits = INTVAL (x);
13372 fputs ((bits & 4) != 0 ? "r" : "", stream);
13379 output_operand_lossage ("missing operand");
13383 switch (GET_CODE (x))
13386 asm_fprintf (stream, "%r", REGNO (x));
13390 output_memory_reference_mode = GET_MODE (x);
13391 output_address (XEXP (x, 0));
13398 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
13399 sizeof (fpstr), 0, 1);
13400 fprintf (stream, "#%s", fpstr);
13403 fprintf (stream, "#%s", fp_immediate_constant (x));
13407 gcc_assert (GET_CODE (x) != NEG);
13408 fputc ('#', stream);
13409 output_addr_const (stream, x);
13415 /* Target hook for assembling integer objects. The ARM version needs to
13416 handle word-sized values specially. */
13418 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
13420 enum machine_mode mode;
13422 if (size == UNITS_PER_WORD && aligned_p)
13424 fputs ("\t.word\t", asm_out_file);
13425 output_addr_const (asm_out_file, x);
13427 /* Mark symbols as position independent. We only do this in the
13428 .text segment, not in the .data segment. */
13429 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
13430 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
13432 /* See legitimize_pic_address for an explanation of the
13433 TARGET_VXWORKS_RTP check. */
13434 if (TARGET_VXWORKS_RTP
13435 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
13436 fputs ("(GOT)", asm_out_file);
13438 fputs ("(GOTOFF)", asm_out_file);
13440 fputc ('\n', asm_out_file);
13444 mode = GET_MODE (x);
13446 if (arm_vector_mode_supported_p (mode))
13450 gcc_assert (GET_CODE (x) == CONST_VECTOR);
13452 units = CONST_VECTOR_NUNITS (x);
13453 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
13455 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13456 for (i = 0; i < units; i++)
13458 rtx elt = CONST_VECTOR_ELT (x, i);
13460 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
13463 for (i = 0; i < units; i++)
13465 rtx elt = CONST_VECTOR_ELT (x, i);
13466 REAL_VALUE_TYPE rval;
13468 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
13471 (rval, GET_MODE_INNER (mode),
13472 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
13478 return default_assemble_integer (x, size, aligned_p);
13482 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
13486 if (!TARGET_AAPCS_BASED)
13489 default_named_section_asm_out_constructor
13490 : default_named_section_asm_out_destructor) (symbol, priority);
13494 /* Put these in the .init_array section, using a special relocation. */
13495 if (priority != DEFAULT_INIT_PRIORITY)
13498 sprintf (buf, "%s.%.5u",
13499 is_ctor ? ".init_array" : ".fini_array",
13501 s = get_section (buf, SECTION_WRITE, NULL_TREE);
13508 switch_to_section (s);
13509 assemble_align (POINTER_SIZE);
13510 fputs ("\t.word\t", asm_out_file);
13511 output_addr_const (asm_out_file, symbol);
13512 fputs ("(target1)\n", asm_out_file);
13515 /* Add a function to the list of static constructors. */
13518 arm_elf_asm_constructor (rtx symbol, int priority)
13520 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
13523 /* Add a function to the list of static destructors. */
13526 arm_elf_asm_destructor (rtx symbol, int priority)
13528 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
13531 /* A finite state machine takes care of noticing whether or not instructions
13532 can be conditionally executed, and thus decrease execution time and code
13533 size by deleting branch instructions. The fsm is controlled by
13534 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
13536 /* The state of the fsm controlling condition codes are:
13537 0: normal, do nothing special
13538 1: make ASM_OUTPUT_OPCODE not output this instruction
13539 2: make ASM_OUTPUT_OPCODE not output this instruction
13540 3: make instructions conditional
13541 4: make instructions conditional
13543 State transitions (state->state by whom under condition):
13544 0 -> 1 final_prescan_insn if the `target' is a label
13545 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
13546 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
13547 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
13548 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
13549 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
13550 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
13551 (the target insn is arm_target_insn).
13553 If the jump clobbers the conditions then we use states 2 and 4.
13555 A similar thing can be done with conditional return insns.
13557 XXX In case the `target' is an unconditional branch, this conditionalising
13558 of the instructions always reduces code size, but not always execution
13559 time. But then, I want to reduce the code size to somewhere near what
13560 /bin/cc produces. */
13562 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
13563 instructions. When a COND_EXEC instruction is seen the subsequent
13564 instructions are scanned so that multiple conditional instructions can be
13565 combined into a single IT block. arm_condexec_count and arm_condexec_mask
13566 specify the length and true/false mask for the IT block. These will be
13567 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
13569 /* Returns the index of the ARM condition code string in
13570 `arm_condition_codes'. COMPARISON should be an rtx like
13571 `(eq (...) (...))'. */
13572 static enum arm_cond_code
13573 get_arm_condition_code (rtx comparison)
13575 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
13577 enum rtx_code comp_code = GET_CODE (comparison);
13579 if (GET_MODE_CLASS (mode) != MODE_CC)
13580 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
13581 XEXP (comparison, 1));
13585 case CC_DNEmode: code = ARM_NE; goto dominance;
13586 case CC_DEQmode: code = ARM_EQ; goto dominance;
13587 case CC_DGEmode: code = ARM_GE; goto dominance;
13588 case CC_DGTmode: code = ARM_GT; goto dominance;
13589 case CC_DLEmode: code = ARM_LE; goto dominance;
13590 case CC_DLTmode: code = ARM_LT; goto dominance;
13591 case CC_DGEUmode: code = ARM_CS; goto dominance;
13592 case CC_DGTUmode: code = ARM_HI; goto dominance;
13593 case CC_DLEUmode: code = ARM_LS; goto dominance;
13594 case CC_DLTUmode: code = ARM_CC;
13597 gcc_assert (comp_code == EQ || comp_code == NE);
13599 if (comp_code == EQ)
13600 return ARM_INVERSE_CONDITION_CODE (code);
13606 case NE: return ARM_NE;
13607 case EQ: return ARM_EQ;
13608 case GE: return ARM_PL;
13609 case LT: return ARM_MI;
13610 default: gcc_unreachable ();
13616 case NE: return ARM_NE;
13617 case EQ: return ARM_EQ;
13618 default: gcc_unreachable ();
13624 case NE: return ARM_MI;
13625 case EQ: return ARM_PL;
13626 default: gcc_unreachable ();
13631 /* These encodings assume that AC=1 in the FPA system control
13632 byte. This allows us to handle all cases except UNEQ and
13636 case GE: return ARM_GE;
13637 case GT: return ARM_GT;
13638 case LE: return ARM_LS;
13639 case LT: return ARM_MI;
13640 case NE: return ARM_NE;
13641 case EQ: return ARM_EQ;
13642 case ORDERED: return ARM_VC;
13643 case UNORDERED: return ARM_VS;
13644 case UNLT: return ARM_LT;
13645 case UNLE: return ARM_LE;
13646 case UNGT: return ARM_HI;
13647 case UNGE: return ARM_PL;
13648 /* UNEQ and LTGT do not have a representation. */
13649 case UNEQ: /* Fall through. */
13650 case LTGT: /* Fall through. */
13651 default: gcc_unreachable ();
13657 case NE: return ARM_NE;
13658 case EQ: return ARM_EQ;
13659 case GE: return ARM_LE;
13660 case GT: return ARM_LT;
13661 case LE: return ARM_GE;
13662 case LT: return ARM_GT;
13663 case GEU: return ARM_LS;
13664 case GTU: return ARM_CC;
13665 case LEU: return ARM_CS;
13666 case LTU: return ARM_HI;
13667 default: gcc_unreachable ();
13673 case LTU: return ARM_CS;
13674 case GEU: return ARM_CC;
13675 default: gcc_unreachable ();
13681 case NE: return ARM_NE;
13682 case EQ: return ARM_EQ;
13683 case GE: return ARM_GE;
13684 case GT: return ARM_GT;
13685 case LE: return ARM_LE;
13686 case LT: return ARM_LT;
13687 case GEU: return ARM_CS;
13688 case GTU: return ARM_HI;
13689 case LEU: return ARM_LS;
13690 case LTU: return ARM_CC;
13691 default: gcc_unreachable ();
13694 default: gcc_unreachable ();
13698 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
13701 thumb2_final_prescan_insn (rtx insn)
13703 rtx first_insn = insn;
13704 rtx body = PATTERN (insn);
13706 enum arm_cond_code code;
13710 /* Remove the previous insn from the count of insns to be output. */
13711 if (arm_condexec_count)
13712 arm_condexec_count--;
13714 /* Nothing to do if we are already inside a conditional block. */
13715 if (arm_condexec_count)
13718 if (GET_CODE (body) != COND_EXEC)
13721 /* Conditional jumps are implemented directly. */
13722 if (GET_CODE (insn) == JUMP_INSN)
13725 predicate = COND_EXEC_TEST (body);
13726 arm_current_cc = get_arm_condition_code (predicate);
13728 n = get_attr_ce_count (insn);
13729 arm_condexec_count = 1;
13730 arm_condexec_mask = (1 << n) - 1;
13731 arm_condexec_masklen = n;
13732 /* See if subsequent instructions can be combined into the same block. */
13735 insn = next_nonnote_insn (insn);
13737 /* Jumping into the middle of an IT block is illegal, so a label or
13738 barrier terminates the block. */
13739 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
13742 body = PATTERN (insn);
13743 /* USE and CLOBBER aren't really insns, so just skip them. */
13744 if (GET_CODE (body) == USE
13745 || GET_CODE (body) == CLOBBER)
13748 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
13749 if (GET_CODE (body) != COND_EXEC)
13751 /* Allow up to 4 conditionally executed instructions in a block. */
13752 n = get_attr_ce_count (insn);
13753 if (arm_condexec_masklen + n > 4)
13756 predicate = COND_EXEC_TEST (body);
13757 code = get_arm_condition_code (predicate);
13758 mask = (1 << n) - 1;
13759 if (arm_current_cc == code)
13760 arm_condexec_mask |= (mask << arm_condexec_masklen);
13761 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
13764 arm_condexec_count++;
13765 arm_condexec_masklen += n;
13767 /* A jump must be the last instruction in a conditional block. */
13768 if (GET_CODE(insn) == JUMP_INSN)
13771 /* Restore recog_data (getting the attributes of other insns can
13772 destroy this array, but final.c assumes that it remains intact
13773 across this call). */
13774 extract_constrain_insn_cached (first_insn);
13778 arm_final_prescan_insn (rtx insn)
13780 /* BODY will hold the body of INSN. */
13781 rtx body = PATTERN (insn);
13783 /* This will be 1 if trying to repeat the trick, and things need to be
13784 reversed if it appears to fail. */
13787 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
13788 taken are clobbered, even if the rtl suggests otherwise. It also
13789 means that we have to grub around within the jump expression to find
13790 out what the conditions are when the jump isn't taken. */
13791 int jump_clobbers = 0;
13793 /* If we start with a return insn, we only succeed if we find another one. */
13794 int seeking_return = 0;
13796 /* START_INSN will hold the insn from where we start looking. This is the
13797 first insn after the following code_label if REVERSE is true. */
13798 rtx start_insn = insn;
13800 /* If in state 4, check if the target branch is reached, in order to
13801 change back to state 0. */
13802 if (arm_ccfsm_state == 4)
13804 if (insn == arm_target_insn)
13806 arm_target_insn = NULL;
13807 arm_ccfsm_state = 0;
13812 /* If in state 3, it is possible to repeat the trick, if this insn is an
13813 unconditional branch to a label, and immediately following this branch
13814 is the previous target label which is only used once, and the label this
13815 branch jumps to is not too far off. */
13816 if (arm_ccfsm_state == 3)
13818 if (simplejump_p (insn))
13820 start_insn = next_nonnote_insn (start_insn);
13821 if (GET_CODE (start_insn) == BARRIER)
13823 /* XXX Isn't this always a barrier? */
13824 start_insn = next_nonnote_insn (start_insn);
13826 if (GET_CODE (start_insn) == CODE_LABEL
13827 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13828 && LABEL_NUSES (start_insn) == 1)
13833 else if (GET_CODE (body) == RETURN)
13835 start_insn = next_nonnote_insn (start_insn);
13836 if (GET_CODE (start_insn) == BARRIER)
13837 start_insn = next_nonnote_insn (start_insn);
13838 if (GET_CODE (start_insn) == CODE_LABEL
13839 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13840 && LABEL_NUSES (start_insn) == 1)
13843 seeking_return = 1;
13852 gcc_assert (!arm_ccfsm_state || reverse);
13853 if (GET_CODE (insn) != JUMP_INSN)
13856 /* This jump might be paralleled with a clobber of the condition codes
13857 the jump should always come first */
13858 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
13859 body = XVECEXP (body, 0, 0);
13862 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
13863 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
13866 int fail = FALSE, succeed = FALSE;
13867 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
13868 int then_not_else = TRUE;
13869 rtx this_insn = start_insn, label = 0;
13871 /* If the jump cannot be done with one instruction, we cannot
13872 conditionally execute the instruction in the inverse case. */
13873 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
13879 /* Register the insn jumped to. */
13882 if (!seeking_return)
13883 label = XEXP (SET_SRC (body), 0);
13885 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
13886 label = XEXP (XEXP (SET_SRC (body), 1), 0);
13887 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
13889 label = XEXP (XEXP (SET_SRC (body), 2), 0);
13890 then_not_else = FALSE;
13892 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
13893 seeking_return = 1;
13894 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
13896 seeking_return = 1;
13897 then_not_else = FALSE;
13900 gcc_unreachable ();
13902 /* See how many insns this branch skips, and what kind of insns. If all
13903 insns are okay, and the label or unconditional branch to the same
13904 label is not too far away, succeed. */
13905 for (insns_skipped = 0;
13906 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
13910 this_insn = next_nonnote_insn (this_insn);
13914 switch (GET_CODE (this_insn))
13917 /* Succeed if it is the target label, otherwise fail since
13918 control falls in from somewhere else. */
13919 if (this_insn == label)
13923 arm_ccfsm_state = 2;
13924 this_insn = next_nonnote_insn (this_insn);
13927 arm_ccfsm_state = 1;
13935 /* Succeed if the following insn is the target label.
13937 If return insns are used then the last insn in a function
13938 will be a barrier. */
13939 this_insn = next_nonnote_insn (this_insn);
13940 if (this_insn && this_insn == label)
13944 arm_ccfsm_state = 2;
13945 this_insn = next_nonnote_insn (this_insn);
13948 arm_ccfsm_state = 1;
13956 /* The AAPCS says that conditional calls should not be
13957 used since they make interworking inefficient (the
13958 linker can't transform BL<cond> into BLX). That's
13959 only a problem if the machine has BLX. */
13966 /* Succeed if the following insn is the target label, or
13967 if the following two insns are a barrier and the
13969 this_insn = next_nonnote_insn (this_insn);
13970 if (this_insn && GET_CODE (this_insn) == BARRIER)
13971 this_insn = next_nonnote_insn (this_insn);
13973 if (this_insn && this_insn == label
13974 && insns_skipped < max_insns_skipped)
13978 arm_ccfsm_state = 2;
13979 this_insn = next_nonnote_insn (this_insn);
13982 arm_ccfsm_state = 1;
13990 /* If this is an unconditional branch to the same label, succeed.
13991 If it is to another label, do nothing. If it is conditional,
13993 /* XXX Probably, the tests for SET and the PC are
13996 scanbody = PATTERN (this_insn);
13997 if (GET_CODE (scanbody) == SET
13998 && GET_CODE (SET_DEST (scanbody)) == PC)
14000 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
14001 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
14003 arm_ccfsm_state = 2;
14006 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
14009 /* Fail if a conditional return is undesirable (e.g. on a
14010 StrongARM), but still allow this if optimizing for size. */
14011 else if (GET_CODE (scanbody) == RETURN
14012 && !use_return_insn (TRUE, NULL)
14015 else if (GET_CODE (scanbody) == RETURN
14018 arm_ccfsm_state = 2;
14021 else if (GET_CODE (scanbody) == PARALLEL)
14023 switch (get_attr_conds (this_insn))
14033 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
14038 /* Instructions using or affecting the condition codes make it
14040 scanbody = PATTERN (this_insn);
14041 if (!(GET_CODE (scanbody) == SET
14042 || GET_CODE (scanbody) == PARALLEL)
14043 || get_attr_conds (this_insn) != CONDS_NOCOND)
14046 /* A conditional cirrus instruction must be followed by
14047 a non Cirrus instruction. However, since we
14048 conditionalize instructions in this function and by
14049 the time we get here we can't add instructions
14050 (nops), because shorten_branches() has already been
14051 called, we will disable conditionalizing Cirrus
14052 instructions to be safe. */
14053 if (GET_CODE (scanbody) != USE
14054 && GET_CODE (scanbody) != CLOBBER
14055 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
14065 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
14066 arm_target_label = CODE_LABEL_NUMBER (label);
14069 gcc_assert (seeking_return || arm_ccfsm_state == 2);
14071 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
14073 this_insn = next_nonnote_insn (this_insn);
14074 gcc_assert (!this_insn
14075 || (GET_CODE (this_insn) != BARRIER
14076 && GET_CODE (this_insn) != CODE_LABEL));
14080 /* Oh, dear! we ran off the end.. give up. */
14081 extract_constrain_insn_cached (insn);
14082 arm_ccfsm_state = 0;
14083 arm_target_insn = NULL;
14086 arm_target_insn = this_insn;
14090 gcc_assert (!reverse);
14092 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
14094 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
14095 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14096 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
14097 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14101 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
14104 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
14108 if (reverse || then_not_else)
14109 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14112 /* Restore recog_data (getting the attributes of other insns can
14113 destroy this array, but final.c assumes that it remains intact
14114 across this call. */
14115 extract_constrain_insn_cached (insn);
14119 /* Output IT instructions. */
14121 thumb2_asm_output_opcode (FILE * stream)
14126 if (arm_condexec_mask)
14128 for (n = 0; n < arm_condexec_masklen; n++)
14129 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
14131 asm_fprintf(stream, "i%s\t%s\n\t", buff,
14132 arm_condition_codes[arm_current_cc]);
14133 arm_condexec_mask = 0;
14137 /* Returns true if REGNO is a valid register
14138 for holding a quantity of type MODE. */
14140 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
14142 if (GET_MODE_CLASS (mode) == MODE_CC)
14143 return (regno == CC_REGNUM
14144 || (TARGET_HARD_FLOAT && TARGET_VFP
14145 && regno == VFPCC_REGNUM));
14148 /* For the Thumb we only allow values bigger than SImode in
14149 registers 0 - 6, so that there is always a second low
14150 register available to hold the upper part of the value.
14151 We probably we ought to ensure that the register is the
14152 start of an even numbered register pair. */
14153 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
14155 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
14156 && IS_CIRRUS_REGNUM (regno))
14157 /* We have outlawed SI values in Cirrus registers because they
14158 reside in the lower 32 bits, but SF values reside in the
14159 upper 32 bits. This causes gcc all sorts of grief. We can't
14160 even split the registers into pairs because Cirrus SI values
14161 get sign extended to 64bits-- aldyh. */
14162 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
14164 if (TARGET_HARD_FLOAT && TARGET_VFP
14165 && IS_VFP_REGNUM (regno))
14167 if (mode == SFmode || mode == SImode)
14168 return VFP_REGNO_OK_FOR_SINGLE (regno);
14170 if (mode == DFmode)
14171 return VFP_REGNO_OK_FOR_DOUBLE (regno);
14174 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
14175 || (VALID_NEON_QREG_MODE (mode)
14176 && NEON_REGNO_OK_FOR_QUAD (regno))
14177 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
14178 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
14179 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
14180 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
14181 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
14186 if (TARGET_REALLY_IWMMXT)
14188 if (IS_IWMMXT_GR_REGNUM (regno))
14189 return mode == SImode;
14191 if (IS_IWMMXT_REGNUM (regno))
14192 return VALID_IWMMXT_REG_MODE (mode);
14195 /* We allow any value to be stored in the general registers.
14196 Restrict doubleword quantities to even register pairs so that we can
14197 use ldrd. Do not allow Neon structure opaque modes in general registers;
14198 they would use too many. */
14199 if (regno <= LAST_ARM_REGNUM)
14200 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
14201 && !VALID_NEON_STRUCT_MODE (mode);
14203 if (regno == FRAME_POINTER_REGNUM
14204 || regno == ARG_POINTER_REGNUM)
14205 /* We only allow integers in the fake hard registers. */
14206 return GET_MODE_CLASS (mode) == MODE_INT;
14208 /* The only registers left are the FPA registers
14209 which we only allow to hold FP values. */
14210 return (TARGET_HARD_FLOAT && TARGET_FPA
14211 && GET_MODE_CLASS (mode) == MODE_FLOAT
14212 && regno >= FIRST_FPA_REGNUM
14213 && regno <= LAST_FPA_REGNUM);
14216 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
14217 not used in arm mode. */
14219 arm_regno_class (int regno)
14223 if (regno == STACK_POINTER_REGNUM)
14225 if (regno == CC_REGNUM)
14232 if (TARGET_THUMB2 && regno < 8)
14235 if ( regno <= LAST_ARM_REGNUM
14236 || regno == FRAME_POINTER_REGNUM
14237 || regno == ARG_POINTER_REGNUM)
14238 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
14240 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
14241 return TARGET_THUMB2 ? CC_REG : NO_REGS;
14243 if (IS_CIRRUS_REGNUM (regno))
14244 return CIRRUS_REGS;
14246 if (IS_VFP_REGNUM (regno))
14248 if (regno <= D7_VFP_REGNUM)
14249 return VFP_D0_D7_REGS;
14250 else if (regno <= LAST_LO_VFP_REGNUM)
14251 return VFP_LO_REGS;
14253 return VFP_HI_REGS;
14256 if (IS_IWMMXT_REGNUM (regno))
14257 return IWMMXT_REGS;
14259 if (IS_IWMMXT_GR_REGNUM (regno))
14260 return IWMMXT_GR_REGS;
14265 /* Handle a special case when computing the offset
14266 of an argument from the frame pointer. */
14268 arm_debugger_arg_offset (int value, rtx addr)
14272 /* We are only interested if dbxout_parms() failed to compute the offset. */
14276 /* We can only cope with the case where the address is held in a register. */
14277 if (GET_CODE (addr) != REG)
14280 /* If we are using the frame pointer to point at the argument, then
14281 an offset of 0 is correct. */
14282 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
14285 /* If we are using the stack pointer to point at the
14286 argument, then an offset of 0 is correct. */
14287 /* ??? Check this is consistent with thumb2 frame layout. */
14288 if ((TARGET_THUMB || !frame_pointer_needed)
14289 && REGNO (addr) == SP_REGNUM)
14292 /* Oh dear. The argument is pointed to by a register rather
14293 than being held in a register, or being stored at a known
14294 offset from the frame pointer. Since GDB only understands
14295 those two kinds of argument we must translate the address
14296 held in the register into an offset from the frame pointer.
14297 We do this by searching through the insns for the function
14298 looking to see where this register gets its value. If the
14299 register is initialized from the frame pointer plus an offset
14300 then we are in luck and we can continue, otherwise we give up.
14302 This code is exercised by producing debugging information
14303 for a function with arguments like this:
14305 double func (double a, double b, int c, double d) {return d;}
14307 Without this code the stab for parameter 'd' will be set to
14308 an offset of 0 from the frame pointer, rather than 8. */
14310 /* The if() statement says:
14312 If the insn is a normal instruction
14313 and if the insn is setting the value in a register
14314 and if the register being set is the register holding the address of the argument
14315 and if the address is computing by an addition
14316 that involves adding to a register
14317 which is the frame pointer
14322 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14324 if ( GET_CODE (insn) == INSN
14325 && GET_CODE (PATTERN (insn)) == SET
14326 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
14327 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
14328 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
14329 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
14330 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
14333 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
14342 warning (0, "unable to compute real location of stacked parameter");
14343 value = 8; /* XXX magic hack */
14349 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
14352 if ((MASK) & insn_flags) \
14353 add_builtin_function ((NAME), (TYPE), (CODE), \
14354 BUILT_IN_MD, NULL, NULL_TREE); \
14358 struct builtin_description
14360 const unsigned int mask;
14361 const enum insn_code icode;
14362 const char * const name;
14363 const enum arm_builtins code;
14364 const enum rtx_code comparison;
14365 const unsigned int flag;
14368 static const struct builtin_description bdesc_2arg[] =
14370 #define IWMMXT_BUILTIN(code, string, builtin) \
14371 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
14372 ARM_BUILTIN_##builtin, 0, 0 },
14374 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
14375 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
14376 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
14377 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
14378 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
14379 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
14380 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
14381 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
14382 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
14383 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
14384 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
14385 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
14386 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
14387 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
14388 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
14389 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
14390 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
14391 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
14392 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
14393 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
14394 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
14395 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
14396 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
14397 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
14398 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
14399 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
14400 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
14401 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
14402 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
14403 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
14404 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
14405 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
14406 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
14407 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
14408 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
14409 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
14410 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
14411 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
14412 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
14413 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
14414 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
14415 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
14416 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
14417 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
14418 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
14419 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
14420 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
14421 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
14422 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
14423 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
14424 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
14425 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
14426 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
14427 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
14428 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
14429 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
14430 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
14431 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
14433 #define IWMMXT_BUILTIN2(code, builtin) \
14434 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
14436 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
14437 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
14438 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
14439 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
14440 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
14441 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
14442 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
14443 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
14444 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
14445 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
14446 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
14447 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
14448 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
14449 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
14450 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
14451 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
14452 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
14453 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
14454 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
14455 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
14456 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
14457 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
14458 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
14459 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
14460 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
14461 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
14462 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
14463 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
14464 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
14465 IWMMXT_BUILTIN2 (rordi3, WRORDI)
14466 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
14467 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
14470 static const struct builtin_description bdesc_1arg[] =
14472 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
14473 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
14474 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
14475 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
14476 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
14477 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
14478 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
14479 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
14480 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
14481 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
14482 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
14483 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
14484 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
14485 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
14486 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
14487 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
14488 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
14489 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
14492 /* Set up all the iWMMXt builtins. This is
14493 not called if TARGET_IWMMXT is zero. */
14496 arm_init_iwmmxt_builtins (void)
14498 const struct builtin_description * d;
14500 tree endlink = void_list_node;
14502 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14503 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14504 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14507 = build_function_type (integer_type_node,
14508 tree_cons (NULL_TREE, integer_type_node, endlink));
14509 tree v8qi_ftype_v8qi_v8qi_int
14510 = build_function_type (V8QI_type_node,
14511 tree_cons (NULL_TREE, V8QI_type_node,
14512 tree_cons (NULL_TREE, V8QI_type_node,
14513 tree_cons (NULL_TREE,
14516 tree v4hi_ftype_v4hi_int
14517 = build_function_type (V4HI_type_node,
14518 tree_cons (NULL_TREE, V4HI_type_node,
14519 tree_cons (NULL_TREE, integer_type_node,
14521 tree v2si_ftype_v2si_int
14522 = build_function_type (V2SI_type_node,
14523 tree_cons (NULL_TREE, V2SI_type_node,
14524 tree_cons (NULL_TREE, integer_type_node,
14526 tree v2si_ftype_di_di
14527 = build_function_type (V2SI_type_node,
14528 tree_cons (NULL_TREE, long_long_integer_type_node,
14529 tree_cons (NULL_TREE, long_long_integer_type_node,
14531 tree di_ftype_di_int
14532 = build_function_type (long_long_integer_type_node,
14533 tree_cons (NULL_TREE, long_long_integer_type_node,
14534 tree_cons (NULL_TREE, integer_type_node,
14536 tree di_ftype_di_int_int
14537 = build_function_type (long_long_integer_type_node,
14538 tree_cons (NULL_TREE, long_long_integer_type_node,
14539 tree_cons (NULL_TREE, integer_type_node,
14540 tree_cons (NULL_TREE,
14543 tree int_ftype_v8qi
14544 = build_function_type (integer_type_node,
14545 tree_cons (NULL_TREE, V8QI_type_node,
14547 tree int_ftype_v4hi
14548 = build_function_type (integer_type_node,
14549 tree_cons (NULL_TREE, V4HI_type_node,
14551 tree int_ftype_v2si
14552 = build_function_type (integer_type_node,
14553 tree_cons (NULL_TREE, V2SI_type_node,
14555 tree int_ftype_v8qi_int
14556 = build_function_type (integer_type_node,
14557 tree_cons (NULL_TREE, V8QI_type_node,
14558 tree_cons (NULL_TREE, integer_type_node,
14560 tree int_ftype_v4hi_int
14561 = build_function_type (integer_type_node,
14562 tree_cons (NULL_TREE, V4HI_type_node,
14563 tree_cons (NULL_TREE, integer_type_node,
14565 tree int_ftype_v2si_int
14566 = build_function_type (integer_type_node,
14567 tree_cons (NULL_TREE, V2SI_type_node,
14568 tree_cons (NULL_TREE, integer_type_node,
14570 tree v8qi_ftype_v8qi_int_int
14571 = build_function_type (V8QI_type_node,
14572 tree_cons (NULL_TREE, V8QI_type_node,
14573 tree_cons (NULL_TREE, integer_type_node,
14574 tree_cons (NULL_TREE,
14577 tree v4hi_ftype_v4hi_int_int
14578 = build_function_type (V4HI_type_node,
14579 tree_cons (NULL_TREE, V4HI_type_node,
14580 tree_cons (NULL_TREE, integer_type_node,
14581 tree_cons (NULL_TREE,
14584 tree v2si_ftype_v2si_int_int
14585 = build_function_type (V2SI_type_node,
14586 tree_cons (NULL_TREE, V2SI_type_node,
14587 tree_cons (NULL_TREE, integer_type_node,
14588 tree_cons (NULL_TREE,
14591 /* Miscellaneous. */
14592 tree v8qi_ftype_v4hi_v4hi
14593 = build_function_type (V8QI_type_node,
14594 tree_cons (NULL_TREE, V4HI_type_node,
14595 tree_cons (NULL_TREE, V4HI_type_node,
14597 tree v4hi_ftype_v2si_v2si
14598 = build_function_type (V4HI_type_node,
14599 tree_cons (NULL_TREE, V2SI_type_node,
14600 tree_cons (NULL_TREE, V2SI_type_node,
14602 tree v2si_ftype_v4hi_v4hi
14603 = build_function_type (V2SI_type_node,
14604 tree_cons (NULL_TREE, V4HI_type_node,
14605 tree_cons (NULL_TREE, V4HI_type_node,
14607 tree v2si_ftype_v8qi_v8qi
14608 = build_function_type (V2SI_type_node,
14609 tree_cons (NULL_TREE, V8QI_type_node,
14610 tree_cons (NULL_TREE, V8QI_type_node,
14612 tree v4hi_ftype_v4hi_di
14613 = build_function_type (V4HI_type_node,
14614 tree_cons (NULL_TREE, V4HI_type_node,
14615 tree_cons (NULL_TREE,
14616 long_long_integer_type_node,
14618 tree v2si_ftype_v2si_di
14619 = build_function_type (V2SI_type_node,
14620 tree_cons (NULL_TREE, V2SI_type_node,
14621 tree_cons (NULL_TREE,
14622 long_long_integer_type_node,
14624 tree void_ftype_int_int
14625 = build_function_type (void_type_node,
14626 tree_cons (NULL_TREE, integer_type_node,
14627 tree_cons (NULL_TREE, integer_type_node,
14630 = build_function_type (long_long_unsigned_type_node, endlink);
14632 = build_function_type (long_long_integer_type_node,
14633 tree_cons (NULL_TREE, V8QI_type_node,
14636 = build_function_type (long_long_integer_type_node,
14637 tree_cons (NULL_TREE, V4HI_type_node,
14640 = build_function_type (long_long_integer_type_node,
14641 tree_cons (NULL_TREE, V2SI_type_node,
14643 tree v2si_ftype_v4hi
14644 = build_function_type (V2SI_type_node,
14645 tree_cons (NULL_TREE, V4HI_type_node,
14647 tree v4hi_ftype_v8qi
14648 = build_function_type (V4HI_type_node,
14649 tree_cons (NULL_TREE, V8QI_type_node,
14652 tree di_ftype_di_v4hi_v4hi
14653 = build_function_type (long_long_unsigned_type_node,
14654 tree_cons (NULL_TREE,
14655 long_long_unsigned_type_node,
14656 tree_cons (NULL_TREE, V4HI_type_node,
14657 tree_cons (NULL_TREE,
14661 tree di_ftype_v4hi_v4hi
14662 = build_function_type (long_long_unsigned_type_node,
14663 tree_cons (NULL_TREE, V4HI_type_node,
14664 tree_cons (NULL_TREE, V4HI_type_node,
14667 /* Normal vector binops. */
14668 tree v8qi_ftype_v8qi_v8qi
14669 = build_function_type (V8QI_type_node,
14670 tree_cons (NULL_TREE, V8QI_type_node,
14671 tree_cons (NULL_TREE, V8QI_type_node,
14673 tree v4hi_ftype_v4hi_v4hi
14674 = build_function_type (V4HI_type_node,
14675 tree_cons (NULL_TREE, V4HI_type_node,
14676 tree_cons (NULL_TREE, V4HI_type_node,
14678 tree v2si_ftype_v2si_v2si
14679 = build_function_type (V2SI_type_node,
14680 tree_cons (NULL_TREE, V2SI_type_node,
14681 tree_cons (NULL_TREE, V2SI_type_node,
14683 tree di_ftype_di_di
14684 = build_function_type (long_long_unsigned_type_node,
14685 tree_cons (NULL_TREE, long_long_unsigned_type_node,
14686 tree_cons (NULL_TREE,
14687 long_long_unsigned_type_node,
14690 /* Add all builtins that are more or less simple operations on two
14692 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14694 /* Use one of the operands; the target can have a different mode for
14695 mask-generating compares. */
14696 enum machine_mode mode;
14702 mode = insn_data[d->icode].operand[1].mode;
14707 type = v8qi_ftype_v8qi_v8qi;
14710 type = v4hi_ftype_v4hi_v4hi;
14713 type = v2si_ftype_v2si_v2si;
14716 type = di_ftype_di_di;
14720 gcc_unreachable ();
14723 def_mbuiltin (d->mask, d->name, type, d->code);
14726 /* Add the remaining MMX insns with somewhat more complicated types. */
14727 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
14728 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
14729 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
14731 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
14732 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
14733 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
14734 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
14735 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
14736 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
14738 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
14739 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
14740 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
14741 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
14742 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
14743 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
14745 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
14746 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
14747 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
14748 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
14749 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
14750 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
14752 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
14753 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
14754 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
14755 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
14756 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
14757 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
14759 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
14761 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
14762 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
14763 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
14764 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
14766 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
14767 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
14768 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
14769 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
14770 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
14771 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
14772 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
14773 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
14774 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
14776 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
14777 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
14778 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
14780 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
14781 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
14782 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
14784 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
14785 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
14786 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
14787 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
14788 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
14789 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
14791 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
14792 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
14793 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
14794 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
14795 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
14796 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
14797 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
14798 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
14799 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
14800 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
14801 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
14802 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
14804 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
14805 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
14806 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
14807 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
14809 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
14810 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
14811 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
14812 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
14813 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
14814 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
14815 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
14819 arm_init_tls_builtins (void)
14823 ftype = build_function_type (ptr_type_node, void_list_node);
14824 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
14825 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
14827 TREE_NOTHROW (decl) = 1;
14828 TREE_READONLY (decl) = 1;
14845 } neon_builtin_type_bits;
14847 #define v8qi_UP T_V8QI
14848 #define v4hi_UP T_V4HI
14849 #define v2si_UP T_V2SI
14850 #define v2sf_UP T_V2SF
14852 #define v16qi_UP T_V16QI
14853 #define v8hi_UP T_V8HI
14854 #define v4si_UP T_V4SI
14855 #define v4sf_UP T_V4SF
14856 #define v2di_UP T_V2DI
14861 #define UP(X) X##_UP
14896 NEON_LOADSTRUCTLANE,
14898 NEON_STORESTRUCTLANE,
14907 const neon_itype itype;
14908 const neon_builtin_type_bits bits;
14909 const enum insn_code codes[T_MAX];
14910 const unsigned int num_vars;
14911 unsigned int base_fcode;
14912 } neon_builtin_datum;
14914 #define CF(N,X) CODE_FOR_neon_##N##X
14916 #define VAR1(T, N, A) \
14917 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
14918 #define VAR2(T, N, A, B) \
14919 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
14920 #define VAR3(T, N, A, B, C) \
14921 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
14922 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
14923 #define VAR4(T, N, A, B, C, D) \
14924 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
14925 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
14926 #define VAR5(T, N, A, B, C, D, E) \
14927 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
14928 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
14929 #define VAR6(T, N, A, B, C, D, E, F) \
14930 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
14931 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
14932 #define VAR7(T, N, A, B, C, D, E, F, G) \
14933 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
14934 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14936 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
14937 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14939 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14940 CF (N, G), CF (N, H) }, 8, 0
14941 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
14942 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14943 | UP (H) | UP (I), \
14944 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14945 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
14946 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
14947 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14948 | UP (H) | UP (I) | UP (J), \
14949 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14950 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
14952 /* The mode entries in the following table correspond to the "key" type of the
14953 instruction variant, i.e. equivalent to that which would be specified after
14954 the assembler mnemonic, which usually refers to the last vector operand.
14955 (Signed/unsigned/polynomial types are not differentiated between though, and
14956 are all mapped onto the same mode for a given element size.) The modes
14957 listed per instruction should be the same as those defined for that
14958 instruction's pattern in neon.md.
14959 WARNING: Variants should be listed in the same increasing order as
14960 neon_builtin_type_bits. */
14962 static neon_builtin_datum neon_builtin_data[] =
14964 { VAR10 (BINOP, vadd,
14965 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14966 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
14967 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
14968 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14969 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14970 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
14971 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14972 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14973 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
14974 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14975 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
14976 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
14977 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
14978 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
14979 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
14980 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
14981 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
14982 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
14983 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
14984 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
14985 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
14986 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
14987 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14988 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14989 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14990 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
14991 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
14992 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
14993 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14994 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14995 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14996 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
14997 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14998 { VAR10 (BINOP, vsub,
14999 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15000 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
15001 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
15002 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15003 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15004 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
15005 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15006 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15007 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15008 { VAR2 (BINOP, vcage, v2sf, v4sf) },
15009 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
15010 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15011 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15012 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
15013 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15014 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
15015 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15016 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15017 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
15018 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15019 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15020 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
15021 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
15022 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
15023 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
15024 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15025 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15026 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15027 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15028 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15029 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15030 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15031 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15032 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
15033 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
15034 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
15035 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15036 /* FIXME: vget_lane supports more variants than this! */
15037 { VAR10 (GETLANE, vget_lane,
15038 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15039 { VAR10 (SETLANE, vset_lane,
15040 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15041 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
15042 { VAR10 (DUP, vdup_n,
15043 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15044 { VAR10 (DUPLANE, vdup_lane,
15045 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15046 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
15047 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
15048 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
15049 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
15050 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
15051 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
15052 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
15053 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15054 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15055 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
15056 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
15057 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15058 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
15059 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
15060 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15061 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15062 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
15063 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
15064 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15065 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
15066 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
15067 { VAR10 (BINOP, vext,
15068 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15069 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15070 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
15071 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
15072 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
15073 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
15074 { VAR10 (SELECT, vbsl,
15075 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15076 { VAR1 (VTBL, vtbl1, v8qi) },
15077 { VAR1 (VTBL, vtbl2, v8qi) },
15078 { VAR1 (VTBL, vtbl3, v8qi) },
15079 { VAR1 (VTBL, vtbl4, v8qi) },
15080 { VAR1 (VTBX, vtbx1, v8qi) },
15081 { VAR1 (VTBX, vtbx2, v8qi) },
15082 { VAR1 (VTBX, vtbx3, v8qi) },
15083 { VAR1 (VTBX, vtbx4, v8qi) },
15084 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15085 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15086 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15087 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
15088 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
15089 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
15090 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
15091 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
15092 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
15093 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
15094 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
15095 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
15096 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
15097 { VAR10 (LOAD1, vld1,
15098 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15099 { VAR10 (LOAD1LANE, vld1_lane,
15100 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15101 { VAR10 (LOAD1, vld1_dup,
15102 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15103 { VAR10 (STORE1, vst1,
15104 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15105 { VAR10 (STORE1LANE, vst1_lane,
15106 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15107 { VAR9 (LOADSTRUCT,
15108 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15109 { VAR7 (LOADSTRUCTLANE, vld2_lane,
15110 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15111 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
15112 { VAR9 (STORESTRUCT, vst2,
15113 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15114 { VAR7 (STORESTRUCTLANE, vst2_lane,
15115 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15116 { VAR9 (LOADSTRUCT,
15117 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15118 { VAR7 (LOADSTRUCTLANE, vld3_lane,
15119 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15120 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
15121 { VAR9 (STORESTRUCT, vst3,
15122 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15123 { VAR7 (STORESTRUCTLANE, vst3_lane,
15124 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15125 { VAR9 (LOADSTRUCT, vld4,
15126 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15127 { VAR7 (LOADSTRUCTLANE, vld4_lane,
15128 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15129 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
15130 { VAR9 (STORESTRUCT, vst4,
15131 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15132 { VAR7 (STORESTRUCTLANE, vst4_lane,
15133 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15134 { VAR10 (LOGICBINOP, vand,
15135 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15136 { VAR10 (LOGICBINOP, vorr,
15137 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15138 { VAR10 (BINOP, veor,
15139 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15140 { VAR10 (LOGICBINOP, vbic,
15141 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15142 { VAR10 (LOGICBINOP, vorn,
15143 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
15159 arm_init_neon_builtins (void)
15161 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
15163 tree neon_intQI_type_node;
15164 tree neon_intHI_type_node;
15165 tree neon_polyQI_type_node;
15166 tree neon_polyHI_type_node;
15167 tree neon_intSI_type_node;
15168 tree neon_intDI_type_node;
15169 tree neon_float_type_node;
15171 tree intQI_pointer_node;
15172 tree intHI_pointer_node;
15173 tree intSI_pointer_node;
15174 tree intDI_pointer_node;
15175 tree float_pointer_node;
15177 tree const_intQI_node;
15178 tree const_intHI_node;
15179 tree const_intSI_node;
15180 tree const_intDI_node;
15181 tree const_float_node;
15183 tree const_intQI_pointer_node;
15184 tree const_intHI_pointer_node;
15185 tree const_intSI_pointer_node;
15186 tree const_intDI_pointer_node;
15187 tree const_float_pointer_node;
15189 tree V8QI_type_node;
15190 tree V4HI_type_node;
15191 tree V2SI_type_node;
15192 tree V2SF_type_node;
15193 tree V16QI_type_node;
15194 tree V8HI_type_node;
15195 tree V4SI_type_node;
15196 tree V4SF_type_node;
15197 tree V2DI_type_node;
15199 tree intUQI_type_node;
15200 tree intUHI_type_node;
15201 tree intUSI_type_node;
15202 tree intUDI_type_node;
15204 tree intEI_type_node;
15205 tree intOI_type_node;
15206 tree intCI_type_node;
15207 tree intXI_type_node;
15209 tree V8QI_pointer_node;
15210 tree V4HI_pointer_node;
15211 tree V2SI_pointer_node;
15212 tree V2SF_pointer_node;
15213 tree V16QI_pointer_node;
15214 tree V8HI_pointer_node;
15215 tree V4SI_pointer_node;
15216 tree V4SF_pointer_node;
15217 tree V2DI_pointer_node;
15219 tree void_ftype_pv8qi_v8qi_v8qi;
15220 tree void_ftype_pv4hi_v4hi_v4hi;
15221 tree void_ftype_pv2si_v2si_v2si;
15222 tree void_ftype_pv2sf_v2sf_v2sf;
15223 tree void_ftype_pdi_di_di;
15224 tree void_ftype_pv16qi_v16qi_v16qi;
15225 tree void_ftype_pv8hi_v8hi_v8hi;
15226 tree void_ftype_pv4si_v4si_v4si;
15227 tree void_ftype_pv4sf_v4sf_v4sf;
15228 tree void_ftype_pv2di_v2di_v2di;
15230 tree reinterp_ftype_dreg[5][5];
15231 tree reinterp_ftype_qreg[5][5];
15232 tree dreg_types[5], qreg_types[5];
15234 /* Create distinguished type nodes for NEON vector element types,
15235 and pointers to values of such types, so we can detect them later. */
15236 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15237 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15238 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15239 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15240 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
15241 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
15242 neon_float_type_node = make_node (REAL_TYPE);
15243 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
15244 layout_type (neon_float_type_node);
15246 /* Define typedefs which exactly correspond to the modes we are basing vector
15247 types on. If you change these names you'll need to change
15248 the table used by arm_mangle_type too. */
15249 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
15250 "__builtin_neon_qi");
15251 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
15252 "__builtin_neon_hi");
15253 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
15254 "__builtin_neon_si");
15255 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
15256 "__builtin_neon_sf");
15257 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
15258 "__builtin_neon_di");
15259 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
15260 "__builtin_neon_poly8");
15261 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
15262 "__builtin_neon_poly16");
15264 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
15265 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
15266 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
15267 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
15268 float_pointer_node = build_pointer_type (neon_float_type_node);
15270 /* Next create constant-qualified versions of the above types. */
15271 const_intQI_node = build_qualified_type (neon_intQI_type_node,
15273 const_intHI_node = build_qualified_type (neon_intHI_type_node,
15275 const_intSI_node = build_qualified_type (neon_intSI_type_node,
15277 const_intDI_node = build_qualified_type (neon_intDI_type_node,
15279 const_float_node = build_qualified_type (neon_float_type_node,
15282 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
15283 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
15284 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
15285 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
15286 const_float_pointer_node = build_pointer_type (const_float_node);
15288 /* Now create vector types based on our NEON element types. */
15289 /* 64-bit vectors. */
15291 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
15293 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
15295 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
15297 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
15298 /* 128-bit vectors. */
15300 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
15302 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
15304 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
15306 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
15308 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
15310 /* Unsigned integer types for various mode sizes. */
15311 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
15312 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
15313 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
15314 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
15316 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
15317 "__builtin_neon_uqi");
15318 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
15319 "__builtin_neon_uhi");
15320 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
15321 "__builtin_neon_usi");
15322 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
15323 "__builtin_neon_udi");
15325 /* Opaque integer types for structures of vectors. */
15326 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
15327 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
15328 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
15329 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
15331 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
15332 "__builtin_neon_ti");
15333 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
15334 "__builtin_neon_ei");
15335 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
15336 "__builtin_neon_oi");
15337 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
15338 "__builtin_neon_ci");
15339 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
15340 "__builtin_neon_xi");
15342 /* Pointers to vector types. */
15343 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
15344 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
15345 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
15346 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
15347 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
15348 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
15349 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
15350 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
15351 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
15353 /* Operations which return results as pairs. */
15354 void_ftype_pv8qi_v8qi_v8qi =
15355 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
15356 V8QI_type_node, NULL);
15357 void_ftype_pv4hi_v4hi_v4hi =
15358 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
15359 V4HI_type_node, NULL);
15360 void_ftype_pv2si_v2si_v2si =
15361 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
15362 V2SI_type_node, NULL);
15363 void_ftype_pv2sf_v2sf_v2sf =
15364 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
15365 V2SF_type_node, NULL);
15366 void_ftype_pdi_di_di =
15367 build_function_type_list (void_type_node, intDI_pointer_node,
15368 neon_intDI_type_node, neon_intDI_type_node, NULL);
15369 void_ftype_pv16qi_v16qi_v16qi =
15370 build_function_type_list (void_type_node, V16QI_pointer_node,
15371 V16QI_type_node, V16QI_type_node, NULL);
15372 void_ftype_pv8hi_v8hi_v8hi =
15373 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
15374 V8HI_type_node, NULL);
15375 void_ftype_pv4si_v4si_v4si =
15376 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
15377 V4SI_type_node, NULL);
15378 void_ftype_pv4sf_v4sf_v4sf =
15379 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
15380 V4SF_type_node, NULL);
15381 void_ftype_pv2di_v2di_v2di =
15382 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
15383 V2DI_type_node, NULL);
15385 dreg_types[0] = V8QI_type_node;
15386 dreg_types[1] = V4HI_type_node;
15387 dreg_types[2] = V2SI_type_node;
15388 dreg_types[3] = V2SF_type_node;
15389 dreg_types[4] = neon_intDI_type_node;
15391 qreg_types[0] = V16QI_type_node;
15392 qreg_types[1] = V8HI_type_node;
15393 qreg_types[2] = V4SI_type_node;
15394 qreg_types[3] = V4SF_type_node;
15395 qreg_types[4] = V2DI_type_node;
15397 for (i = 0; i < 5; i++)
15400 for (j = 0; j < 5; j++)
15402 reinterp_ftype_dreg[i][j]
15403 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
15404 reinterp_ftype_qreg[i][j]
15405 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
15409 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
15411 neon_builtin_datum *d = &neon_builtin_data[i];
15412 unsigned int j, codeidx = 0;
15414 d->base_fcode = fcode;
15416 for (j = 0; j < T_MAX; j++)
15418 const char* const modenames[] = {
15419 "v8qi", "v4hi", "v2si", "v2sf", "di",
15420 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
15424 enum insn_code icode;
15425 int is_load = 0, is_store = 0;
15427 if ((d->bits & (1 << j)) == 0)
15430 icode = d->codes[codeidx++];
15435 case NEON_LOAD1LANE:
15436 case NEON_LOADSTRUCT:
15437 case NEON_LOADSTRUCTLANE:
15439 /* Fall through. */
15441 case NEON_STORE1LANE:
15442 case NEON_STORESTRUCT:
15443 case NEON_STORESTRUCTLANE:
15446 /* Fall through. */
15449 case NEON_LOGICBINOP:
15450 case NEON_SHIFTINSERT:
15457 case NEON_SHIFTIMM:
15458 case NEON_SHIFTACC:
15464 case NEON_LANEMULL:
15465 case NEON_LANEMULH:
15467 case NEON_SCALARMUL:
15468 case NEON_SCALARMULL:
15469 case NEON_SCALARMULH:
15470 case NEON_SCALARMAC:
15476 tree return_type = void_type_node, args = void_list_node;
15478 /* Build a function type directly from the insn_data for this
15479 builtin. The build_function_type() function takes care of
15480 removing duplicates for us. */
15481 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
15485 if (is_load && k == 1)
15487 /* Neon load patterns always have the memory operand
15488 (a SImode pointer) in the operand 1 position. We
15489 want a const pointer to the element type in that
15491 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15497 eltype = const_intQI_pointer_node;
15502 eltype = const_intHI_pointer_node;
15507 eltype = const_intSI_pointer_node;
15512 eltype = const_float_pointer_node;
15517 eltype = const_intDI_pointer_node;
15520 default: gcc_unreachable ();
15523 else if (is_store && k == 0)
15525 /* Similarly, Neon store patterns use operand 0 as
15526 the memory location to store to (a SImode pointer).
15527 Use a pointer to the element type of the store in
15529 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15535 eltype = intQI_pointer_node;
15540 eltype = intHI_pointer_node;
15545 eltype = intSI_pointer_node;
15550 eltype = float_pointer_node;
15555 eltype = intDI_pointer_node;
15558 default: gcc_unreachable ();
15563 switch (insn_data[icode].operand[k].mode)
15565 case VOIDmode: eltype = void_type_node; break;
15567 case QImode: eltype = neon_intQI_type_node; break;
15568 case HImode: eltype = neon_intHI_type_node; break;
15569 case SImode: eltype = neon_intSI_type_node; break;
15570 case SFmode: eltype = neon_float_type_node; break;
15571 case DImode: eltype = neon_intDI_type_node; break;
15572 case TImode: eltype = intTI_type_node; break;
15573 case EImode: eltype = intEI_type_node; break;
15574 case OImode: eltype = intOI_type_node; break;
15575 case CImode: eltype = intCI_type_node; break;
15576 case XImode: eltype = intXI_type_node; break;
15577 /* 64-bit vectors. */
15578 case V8QImode: eltype = V8QI_type_node; break;
15579 case V4HImode: eltype = V4HI_type_node; break;
15580 case V2SImode: eltype = V2SI_type_node; break;
15581 case V2SFmode: eltype = V2SF_type_node; break;
15582 /* 128-bit vectors. */
15583 case V16QImode: eltype = V16QI_type_node; break;
15584 case V8HImode: eltype = V8HI_type_node; break;
15585 case V4SImode: eltype = V4SI_type_node; break;
15586 case V4SFmode: eltype = V4SF_type_node; break;
15587 case V2DImode: eltype = V2DI_type_node; break;
15588 default: gcc_unreachable ();
15592 if (k == 0 && !is_store)
15593 return_type = eltype;
15595 args = tree_cons (NULL_TREE, eltype, args);
15598 ftype = build_function_type (return_type, args);
15602 case NEON_RESULTPAIR:
15604 switch (insn_data[icode].operand[1].mode)
15606 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
15607 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
15608 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
15609 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
15610 case DImode: ftype = void_ftype_pdi_di_di; break;
15611 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
15612 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
15613 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
15614 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
15615 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
15616 default: gcc_unreachable ();
15621 case NEON_REINTERP:
15623 /* We iterate over 5 doubleword types, then 5 quadword
15626 switch (insn_data[icode].operand[0].mode)
15628 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
15629 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
15630 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
15631 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
15632 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
15633 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
15634 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
15635 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
15636 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
15637 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
15638 default: gcc_unreachable ();
15644 gcc_unreachable ();
15647 gcc_assert (ftype != NULL);
15649 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
15651 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
15658 arm_init_builtins (void)
15660 arm_init_tls_builtins ();
15662 if (TARGET_REALLY_IWMMXT)
15663 arm_init_iwmmxt_builtins ();
15666 arm_init_neon_builtins ();
15669 /* Errors in the source file can cause expand_expr to return const0_rtx
15670 where we expect a vector. To avoid crashing, use one of the vector
15671 clear instructions. */
15674 safe_vector_operand (rtx x, enum machine_mode mode)
15676 if (x != const0_rtx)
15678 x = gen_reg_rtx (mode);
15680 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
15681 : gen_rtx_SUBREG (DImode, x, 0)));
15685 /* Subroutine of arm_expand_builtin to take care of binop insns. */
15688 arm_expand_binop_builtin (enum insn_code icode,
15689 tree exp, rtx target)
15692 tree arg0 = CALL_EXPR_ARG (exp, 0);
15693 tree arg1 = CALL_EXPR_ARG (exp, 1);
15694 rtx op0 = expand_normal (arg0);
15695 rtx op1 = expand_normal (arg1);
15696 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15697 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15698 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15700 if (VECTOR_MODE_P (mode0))
15701 op0 = safe_vector_operand (op0, mode0);
15702 if (VECTOR_MODE_P (mode1))
15703 op1 = safe_vector_operand (op1, mode1);
15706 || GET_MODE (target) != tmode
15707 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15708 target = gen_reg_rtx (tmode);
15710 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
15712 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15713 op0 = copy_to_mode_reg (mode0, op0);
15714 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15715 op1 = copy_to_mode_reg (mode1, op1);
15717 pat = GEN_FCN (icode) (target, op0, op1);
15724 /* Subroutine of arm_expand_builtin to take care of unop insns. */
15727 arm_expand_unop_builtin (enum insn_code icode,
15728 tree exp, rtx target, int do_load)
15731 tree arg0 = CALL_EXPR_ARG (exp, 0);
15732 rtx op0 = expand_normal (arg0);
15733 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15734 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15737 || GET_MODE (target) != tmode
15738 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15739 target = gen_reg_rtx (tmode);
15741 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15744 if (VECTOR_MODE_P (mode0))
15745 op0 = safe_vector_operand (op0, mode0);
15747 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15748 op0 = copy_to_mode_reg (mode0, op0);
15751 pat = GEN_FCN (icode) (target, op0);
15759 neon_builtin_compare (const void *a, const void *b)
15761 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
15762 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
15763 unsigned int soughtcode = key->base_fcode;
15765 if (soughtcode >= memb->base_fcode
15766 && soughtcode < memb->base_fcode + memb->num_vars)
15768 else if (soughtcode < memb->base_fcode)
15774 static enum insn_code
15775 locate_neon_builtin_icode (int fcode, neon_itype *itype)
15777 neon_builtin_datum key, *found;
15780 key.base_fcode = fcode;
15781 found = (neon_builtin_datum *)
15782 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
15783 sizeof (neon_builtin_data[0]), neon_builtin_compare);
15784 gcc_assert (found);
15785 idx = fcode - (int) found->base_fcode;
15786 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
15789 *itype = found->itype;
15791 return found->codes[idx];
15795 NEON_ARG_COPY_TO_REG,
15800 #define NEON_MAX_BUILTIN_ARGS 5
15802 /* Expand a Neon builtin. */
15804 arm_expand_neon_args (rtx target, int icode, int have_retval,
15809 tree arg[NEON_MAX_BUILTIN_ARGS];
15810 rtx op[NEON_MAX_BUILTIN_ARGS];
15811 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15812 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
15817 || GET_MODE (target) != tmode
15818 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
15819 target = gen_reg_rtx (tmode);
15821 va_start (ap, exp);
15825 builtin_arg thisarg = va_arg (ap, int);
15827 if (thisarg == NEON_ARG_STOP)
15831 arg[argc] = CALL_EXPR_ARG (exp, argc);
15832 op[argc] = expand_normal (arg[argc]);
15833 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
15837 case NEON_ARG_COPY_TO_REG:
15838 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
15839 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15840 (op[argc], mode[argc]))
15841 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
15844 case NEON_ARG_CONSTANT:
15845 /* FIXME: This error message is somewhat unhelpful. */
15846 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15847 (op[argc], mode[argc]))
15848 error ("argument must be a constant");
15851 case NEON_ARG_STOP:
15852 gcc_unreachable ();
15865 pat = GEN_FCN (icode) (target, op[0]);
15869 pat = GEN_FCN (icode) (target, op[0], op[1]);
15873 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
15877 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
15881 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
15885 gcc_unreachable ();
15891 pat = GEN_FCN (icode) (op[0]);
15895 pat = GEN_FCN (icode) (op[0], op[1]);
15899 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15903 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15907 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
15911 gcc_unreachable ();
15922 /* Expand a Neon builtin. These are "special" because they don't have symbolic
15923 constants defined per-instruction or per instruction-variant. Instead, the
15924 required info is looked up in the table neon_builtin_data. */
15926 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
15929 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
15936 return arm_expand_neon_args (target, icode, 1, exp,
15937 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15941 case NEON_SCALARMUL:
15942 case NEON_SCALARMULL:
15943 case NEON_SCALARMULH:
15944 case NEON_SHIFTINSERT:
15945 case NEON_LOGICBINOP:
15946 return arm_expand_neon_args (target, icode, 1, exp,
15947 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15951 return arm_expand_neon_args (target, icode, 1, exp,
15952 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15953 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15957 case NEON_SHIFTIMM:
15958 return arm_expand_neon_args (target, icode, 1, exp,
15959 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
15963 return arm_expand_neon_args (target, icode, 1, exp,
15964 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15968 case NEON_REINTERP:
15969 return arm_expand_neon_args (target, icode, 1, exp,
15970 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15974 return arm_expand_neon_args (target, icode, 1, exp,
15975 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15977 case NEON_RESULTPAIR:
15978 return arm_expand_neon_args (target, icode, 0, exp,
15979 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15983 case NEON_LANEMULL:
15984 case NEON_LANEMULH:
15985 return arm_expand_neon_args (target, icode, 1, exp,
15986 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15987 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15990 return arm_expand_neon_args (target, icode, 1, exp,
15991 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15992 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15994 case NEON_SHIFTACC:
15995 return arm_expand_neon_args (target, icode, 1, exp,
15996 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15997 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15999 case NEON_SCALARMAC:
16000 return arm_expand_neon_args (target, icode, 1, exp,
16001 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16002 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16006 return arm_expand_neon_args (target, icode, 1, exp,
16007 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16011 case NEON_LOADSTRUCT:
16012 return arm_expand_neon_args (target, icode, 1, exp,
16013 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16015 case NEON_LOAD1LANE:
16016 case NEON_LOADSTRUCTLANE:
16017 return arm_expand_neon_args (target, icode, 1, exp,
16018 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16022 case NEON_STORESTRUCT:
16023 return arm_expand_neon_args (target, icode, 0, exp,
16024 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16026 case NEON_STORE1LANE:
16027 case NEON_STORESTRUCTLANE:
16028 return arm_expand_neon_args (target, icode, 0, exp,
16029 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16033 gcc_unreachable ();
16036 /* Emit code to reinterpret one Neon type as another, without altering bits. */
16038 neon_reinterpret (rtx dest, rtx src)
16040 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
16043 /* Emit code to place a Neon pair result in memory locations (with equal
16046 neon_emit_pair_result_insn (enum machine_mode mode,
16047 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
16050 rtx mem = gen_rtx_MEM (mode, destaddr);
16051 rtx tmp1 = gen_reg_rtx (mode);
16052 rtx tmp2 = gen_reg_rtx (mode);
16054 emit_insn (intfn (tmp1, op1, tmp2, op2));
16056 emit_move_insn (mem, tmp1);
16057 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
16058 emit_move_insn (mem, tmp2);
16061 /* Set up operands for a register copy from src to dest, taking care not to
16062 clobber registers in the process.
16063 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
16064 be called with a large N, so that should be OK. */
16067 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
16069 unsigned int copied = 0, opctr = 0;
16070 unsigned int done = (1 << count) - 1;
16073 while (copied != done)
16075 for (i = 0; i < count; i++)
16079 for (j = 0; good && j < count; j++)
16080 if (i != j && (copied & (1 << j)) == 0
16081 && reg_overlap_mentioned_p (src[j], dest[i]))
16086 operands[opctr++] = dest[i];
16087 operands[opctr++] = src[i];
16093 gcc_assert (opctr == count * 2);
16096 /* Expand an expression EXP that calls a built-in function,
16097 with result going to TARGET if that's convenient
16098 (and in mode MODE if that's convenient).
16099 SUBTARGET may be used as the target for computing one of EXP's operands.
16100 IGNORE is nonzero if the value is to be ignored. */
16103 arm_expand_builtin (tree exp,
16105 rtx subtarget ATTRIBUTE_UNUSED,
16106 enum machine_mode mode ATTRIBUTE_UNUSED,
16107 int ignore ATTRIBUTE_UNUSED)
16109 const struct builtin_description * d;
16110 enum insn_code icode;
16111 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16119 int fcode = DECL_FUNCTION_CODE (fndecl);
16121 enum machine_mode tmode;
16122 enum machine_mode mode0;
16123 enum machine_mode mode1;
16124 enum machine_mode mode2;
16126 if (fcode >= ARM_BUILTIN_NEON_BASE)
16127 return arm_expand_neon_builtin (fcode, exp, target);
16131 case ARM_BUILTIN_TEXTRMSB:
16132 case ARM_BUILTIN_TEXTRMUB:
16133 case ARM_BUILTIN_TEXTRMSH:
16134 case ARM_BUILTIN_TEXTRMUH:
16135 case ARM_BUILTIN_TEXTRMSW:
16136 case ARM_BUILTIN_TEXTRMUW:
16137 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
16138 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
16139 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
16140 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
16141 : CODE_FOR_iwmmxt_textrmw);
16143 arg0 = CALL_EXPR_ARG (exp, 0);
16144 arg1 = CALL_EXPR_ARG (exp, 1);
16145 op0 = expand_normal (arg0);
16146 op1 = expand_normal (arg1);
16147 tmode = insn_data[icode].operand[0].mode;
16148 mode0 = insn_data[icode].operand[1].mode;
16149 mode1 = insn_data[icode].operand[2].mode;
16151 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16152 op0 = copy_to_mode_reg (mode0, op0);
16153 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16155 /* @@@ better error message */
16156 error ("selector must be an immediate");
16157 return gen_reg_rtx (tmode);
16160 || GET_MODE (target) != tmode
16161 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16162 target = gen_reg_rtx (tmode);
16163 pat = GEN_FCN (icode) (target, op0, op1);
16169 case ARM_BUILTIN_TINSRB:
16170 case ARM_BUILTIN_TINSRH:
16171 case ARM_BUILTIN_TINSRW:
16172 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
16173 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
16174 : CODE_FOR_iwmmxt_tinsrw);
16175 arg0 = CALL_EXPR_ARG (exp, 0);
16176 arg1 = CALL_EXPR_ARG (exp, 1);
16177 arg2 = CALL_EXPR_ARG (exp, 2);
16178 op0 = expand_normal (arg0);
16179 op1 = expand_normal (arg1);
16180 op2 = expand_normal (arg2);
16181 tmode = insn_data[icode].operand[0].mode;
16182 mode0 = insn_data[icode].operand[1].mode;
16183 mode1 = insn_data[icode].operand[2].mode;
16184 mode2 = insn_data[icode].operand[3].mode;
16186 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16187 op0 = copy_to_mode_reg (mode0, op0);
16188 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16189 op1 = copy_to_mode_reg (mode1, op1);
16190 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16192 /* @@@ better error message */
16193 error ("selector must be an immediate");
16197 || GET_MODE (target) != tmode
16198 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16199 target = gen_reg_rtx (tmode);
16200 pat = GEN_FCN (icode) (target, op0, op1, op2);
16206 case ARM_BUILTIN_SETWCX:
16207 arg0 = CALL_EXPR_ARG (exp, 0);
16208 arg1 = CALL_EXPR_ARG (exp, 1);
16209 op0 = force_reg (SImode, expand_normal (arg0));
16210 op1 = expand_normal (arg1);
16211 emit_insn (gen_iwmmxt_tmcr (op1, op0));
16214 case ARM_BUILTIN_GETWCX:
16215 arg0 = CALL_EXPR_ARG (exp, 0);
16216 op0 = expand_normal (arg0);
16217 target = gen_reg_rtx (SImode);
16218 emit_insn (gen_iwmmxt_tmrc (target, op0));
16221 case ARM_BUILTIN_WSHUFH:
16222 icode = CODE_FOR_iwmmxt_wshufh;
16223 arg0 = CALL_EXPR_ARG (exp, 0);
16224 arg1 = CALL_EXPR_ARG (exp, 1);
16225 op0 = expand_normal (arg0);
16226 op1 = expand_normal (arg1);
16227 tmode = insn_data[icode].operand[0].mode;
16228 mode1 = insn_data[icode].operand[1].mode;
16229 mode2 = insn_data[icode].operand[2].mode;
16231 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16232 op0 = copy_to_mode_reg (mode1, op0);
16233 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16235 /* @@@ better error message */
16236 error ("mask must be an immediate");
16240 || GET_MODE (target) != tmode
16241 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16242 target = gen_reg_rtx (tmode);
16243 pat = GEN_FCN (icode) (target, op0, op1);
16249 case ARM_BUILTIN_WSADB:
16250 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
16251 case ARM_BUILTIN_WSADH:
16252 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
16253 case ARM_BUILTIN_WSADBZ:
16254 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
16255 case ARM_BUILTIN_WSADHZ:
16256 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
16258 /* Several three-argument builtins. */
16259 case ARM_BUILTIN_WMACS:
16260 case ARM_BUILTIN_WMACU:
16261 case ARM_BUILTIN_WALIGN:
16262 case ARM_BUILTIN_TMIA:
16263 case ARM_BUILTIN_TMIAPH:
16264 case ARM_BUILTIN_TMIATT:
16265 case ARM_BUILTIN_TMIATB:
16266 case ARM_BUILTIN_TMIABT:
16267 case ARM_BUILTIN_TMIABB:
16268 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
16269 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
16270 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
16271 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
16272 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
16273 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
16274 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
16275 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
16276 : CODE_FOR_iwmmxt_walign);
16277 arg0 = CALL_EXPR_ARG (exp, 0);
16278 arg1 = CALL_EXPR_ARG (exp, 1);
16279 arg2 = CALL_EXPR_ARG (exp, 2);
16280 op0 = expand_normal (arg0);
16281 op1 = expand_normal (arg1);
16282 op2 = expand_normal (arg2);
16283 tmode = insn_data[icode].operand[0].mode;
16284 mode0 = insn_data[icode].operand[1].mode;
16285 mode1 = insn_data[icode].operand[2].mode;
16286 mode2 = insn_data[icode].operand[3].mode;
16288 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16289 op0 = copy_to_mode_reg (mode0, op0);
16290 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16291 op1 = copy_to_mode_reg (mode1, op1);
16292 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16293 op2 = copy_to_mode_reg (mode2, op2);
16295 || GET_MODE (target) != tmode
16296 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16297 target = gen_reg_rtx (tmode);
16298 pat = GEN_FCN (icode) (target, op0, op1, op2);
16304 case ARM_BUILTIN_WZERO:
16305 target = gen_reg_rtx (DImode);
16306 emit_insn (gen_iwmmxt_clrdi (target));
16309 case ARM_BUILTIN_THREAD_POINTER:
16310 return arm_load_tp (target);
16316 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16317 if (d->code == (const enum arm_builtins) fcode)
16318 return arm_expand_binop_builtin (d->icode, exp, target);
16320 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16321 if (d->code == (const enum arm_builtins) fcode)
16322 return arm_expand_unop_builtin (d->icode, exp, target, 0);
16324 /* @@@ Should really do something sensible here. */
16328 /* Return the number (counting from 0) of
16329 the least significant set bit in MASK. */
16332 number_of_first_bit_set (unsigned mask)
16337 (mask & (1 << bit)) == 0;
16344 /* Emit code to push or pop registers to or from the stack. F is the
16345 assembly file. MASK is the registers to push or pop. PUSH is
16346 nonzero if we should push, and zero if we should pop. For debugging
16347 output, if pushing, adjust CFA_OFFSET by the amount of space added
16348 to the stack. REAL_REGS should have the same number of bits set as
16349 MASK, and will be used instead (in the same order) to describe which
16350 registers were saved - this is used to mark the save slots when we
16351 push high registers after moving them to low registers. */
16353 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
16354 unsigned long real_regs)
16357 int lo_mask = mask & 0xFF;
16358 int pushed_words = 0;
16362 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
16364 /* Special case. Do not generate a POP PC statement here, do it in
16366 thumb_exit (f, -1);
16370 if (ARM_EABI_UNWIND_TABLES && push)
16372 fprintf (f, "\t.save\t{");
16373 for (regno = 0; regno < 15; regno++)
16375 if (real_regs & (1 << regno))
16377 if (real_regs & ((1 << regno) -1))
16379 asm_fprintf (f, "%r", regno);
16382 fprintf (f, "}\n");
16385 fprintf (f, "\t%s\t{", push ? "push" : "pop");
16387 /* Look at the low registers first. */
16388 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
16392 asm_fprintf (f, "%r", regno);
16394 if ((lo_mask & ~1) != 0)
16401 if (push && (mask & (1 << LR_REGNUM)))
16403 /* Catch pushing the LR. */
16407 asm_fprintf (f, "%r", LR_REGNUM);
16411 else if (!push && (mask & (1 << PC_REGNUM)))
16413 /* Catch popping the PC. */
16414 if (TARGET_INTERWORK || TARGET_BACKTRACE
16415 || crtl->calls_eh_return)
16417 /* The PC is never poped directly, instead
16418 it is popped into r3 and then BX is used. */
16419 fprintf (f, "}\n");
16421 thumb_exit (f, -1);
16430 asm_fprintf (f, "%r", PC_REGNUM);
16434 fprintf (f, "}\n");
16436 if (push && pushed_words && dwarf2out_do_frame ())
16438 char *l = dwarf2out_cfi_label ();
16439 int pushed_mask = real_regs;
16441 *cfa_offset += pushed_words * 4;
16442 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
16445 pushed_mask = real_regs;
16446 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
16448 if (pushed_mask & 1)
16449 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
16454 /* Generate code to return from a thumb function.
16455 If 'reg_containing_return_addr' is -1, then the return address is
16456 actually on the stack, at the stack pointer. */
16458 thumb_exit (FILE *f, int reg_containing_return_addr)
16460 unsigned regs_available_for_popping;
16461 unsigned regs_to_pop;
16463 unsigned available;
16467 int restore_a4 = FALSE;
16469 /* Compute the registers we need to pop. */
16473 if (reg_containing_return_addr == -1)
16475 regs_to_pop |= 1 << LR_REGNUM;
16479 if (TARGET_BACKTRACE)
16481 /* Restore the (ARM) frame pointer and stack pointer. */
16482 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
16486 /* If there is nothing to pop then just emit the BX instruction and
16488 if (pops_needed == 0)
16490 if (crtl->calls_eh_return)
16491 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16493 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16496 /* Otherwise if we are not supporting interworking and we have not created
16497 a backtrace structure and the function was not entered in ARM mode then
16498 just pop the return address straight into the PC. */
16499 else if (!TARGET_INTERWORK
16500 && !TARGET_BACKTRACE
16501 && !is_called_in_ARM_mode (current_function_decl)
16502 && !crtl->calls_eh_return)
16504 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
16508 /* Find out how many of the (return) argument registers we can corrupt. */
16509 regs_available_for_popping = 0;
16511 /* If returning via __builtin_eh_return, the bottom three registers
16512 all contain information needed for the return. */
16513 if (crtl->calls_eh_return)
16517 /* If we can deduce the registers used from the function's
16518 return value. This is more reliable that examining
16519 df_regs_ever_live_p () because that will be set if the register is
16520 ever used in the function, not just if the register is used
16521 to hold a return value. */
16523 if (crtl->return_rtx != 0)
16524 mode = GET_MODE (crtl->return_rtx);
16526 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16528 size = GET_MODE_SIZE (mode);
16532 /* In a void function we can use any argument register.
16533 In a function that returns a structure on the stack
16534 we can use the second and third argument registers. */
16535 if (mode == VOIDmode)
16536 regs_available_for_popping =
16537 (1 << ARG_REGISTER (1))
16538 | (1 << ARG_REGISTER (2))
16539 | (1 << ARG_REGISTER (3));
16541 regs_available_for_popping =
16542 (1 << ARG_REGISTER (2))
16543 | (1 << ARG_REGISTER (3));
16545 else if (size <= 4)
16546 regs_available_for_popping =
16547 (1 << ARG_REGISTER (2))
16548 | (1 << ARG_REGISTER (3));
16549 else if (size <= 8)
16550 regs_available_for_popping =
16551 (1 << ARG_REGISTER (3));
16554 /* Match registers to be popped with registers into which we pop them. */
16555 for (available = regs_available_for_popping,
16556 required = regs_to_pop;
16557 required != 0 && available != 0;
16558 available &= ~(available & - available),
16559 required &= ~(required & - required))
16562 /* If we have any popping registers left over, remove them. */
16564 regs_available_for_popping &= ~available;
16566 /* Otherwise if we need another popping register we can use
16567 the fourth argument register. */
16568 else if (pops_needed)
16570 /* If we have not found any free argument registers and
16571 reg a4 contains the return address, we must move it. */
16572 if (regs_available_for_popping == 0
16573 && reg_containing_return_addr == LAST_ARG_REGNUM)
16575 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16576 reg_containing_return_addr = LR_REGNUM;
16578 else if (size > 12)
16580 /* Register a4 is being used to hold part of the return value,
16581 but we have dire need of a free, low register. */
16584 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
16587 if (reg_containing_return_addr != LAST_ARG_REGNUM)
16589 /* The fourth argument register is available. */
16590 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
16596 /* Pop as many registers as we can. */
16597 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16598 regs_available_for_popping);
16600 /* Process the registers we popped. */
16601 if (reg_containing_return_addr == -1)
16603 /* The return address was popped into the lowest numbered register. */
16604 regs_to_pop &= ~(1 << LR_REGNUM);
16606 reg_containing_return_addr =
16607 number_of_first_bit_set (regs_available_for_popping);
16609 /* Remove this register for the mask of available registers, so that
16610 the return address will not be corrupted by further pops. */
16611 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
16614 /* If we popped other registers then handle them here. */
16615 if (regs_available_for_popping)
16619 /* Work out which register currently contains the frame pointer. */
16620 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
16622 /* Move it into the correct place. */
16623 asm_fprintf (f, "\tmov\t%r, %r\n",
16624 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
16626 /* (Temporarily) remove it from the mask of popped registers. */
16627 regs_available_for_popping &= ~(1 << frame_pointer);
16628 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
16630 if (regs_available_for_popping)
16634 /* We popped the stack pointer as well,
16635 find the register that contains it. */
16636 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
16638 /* Move it into the stack register. */
16639 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
16641 /* At this point we have popped all necessary registers, so
16642 do not worry about restoring regs_available_for_popping
16643 to its correct value:
16645 assert (pops_needed == 0)
16646 assert (regs_available_for_popping == (1 << frame_pointer))
16647 assert (regs_to_pop == (1 << STACK_POINTER)) */
16651 /* Since we have just move the popped value into the frame
16652 pointer, the popping register is available for reuse, and
16653 we know that we still have the stack pointer left to pop. */
16654 regs_available_for_popping |= (1 << frame_pointer);
16658 /* If we still have registers left on the stack, but we no longer have
16659 any registers into which we can pop them, then we must move the return
16660 address into the link register and make available the register that
16662 if (regs_available_for_popping == 0 && pops_needed > 0)
16664 regs_available_for_popping |= 1 << reg_containing_return_addr;
16666 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
16667 reg_containing_return_addr);
16669 reg_containing_return_addr = LR_REGNUM;
16672 /* If we have registers left on the stack then pop some more.
16673 We know that at most we will want to pop FP and SP. */
16674 if (pops_needed > 0)
16679 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16680 regs_available_for_popping);
16682 /* We have popped either FP or SP.
16683 Move whichever one it is into the correct register. */
16684 popped_into = number_of_first_bit_set (regs_available_for_popping);
16685 move_to = number_of_first_bit_set (regs_to_pop);
16687 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
16689 regs_to_pop &= ~(1 << move_to);
16694 /* If we still have not popped everything then we must have only
16695 had one register available to us and we are now popping the SP. */
16696 if (pops_needed > 0)
16700 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16701 regs_available_for_popping);
16703 popped_into = number_of_first_bit_set (regs_available_for_popping);
16705 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
16707 assert (regs_to_pop == (1 << STACK_POINTER))
16708 assert (pops_needed == 1)
16712 /* If necessary restore the a4 register. */
16715 if (reg_containing_return_addr != LR_REGNUM)
16717 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16718 reg_containing_return_addr = LR_REGNUM;
16721 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
16724 if (crtl->calls_eh_return)
16725 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16727 /* Return to caller. */
16728 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16733 thumb1_final_prescan_insn (rtx insn)
16735 if (flag_print_asm_name)
16736 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
16737 INSN_ADDRESSES (INSN_UID (insn)));
16741 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
16743 unsigned HOST_WIDE_INT mask = 0xff;
16746 if (val == 0) /* XXX */
16749 for (i = 0; i < 25; i++)
16750 if ((val & (mask << i)) == val)
16756 /* Returns nonzero if the current function contains,
16757 or might contain a far jump. */
16759 thumb_far_jump_used_p (void)
16763 /* This test is only important for leaf functions. */
16764 /* assert (!leaf_function_p ()); */
16766 /* If we have already decided that far jumps may be used,
16767 do not bother checking again, and always return true even if
16768 it turns out that they are not being used. Once we have made
16769 the decision that far jumps are present (and that hence the link
16770 register will be pushed onto the stack) we cannot go back on it. */
16771 if (cfun->machine->far_jump_used)
16774 /* If this function is not being called from the prologue/epilogue
16775 generation code then it must be being called from the
16776 INITIAL_ELIMINATION_OFFSET macro. */
16777 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
16779 /* In this case we know that we are being asked about the elimination
16780 of the arg pointer register. If that register is not being used,
16781 then there are no arguments on the stack, and we do not have to
16782 worry that a far jump might force the prologue to push the link
16783 register, changing the stack offsets. In this case we can just
16784 return false, since the presence of far jumps in the function will
16785 not affect stack offsets.
16787 If the arg pointer is live (or if it was live, but has now been
16788 eliminated and so set to dead) then we do have to test to see if
16789 the function might contain a far jump. This test can lead to some
16790 false negatives, since before reload is completed, then length of
16791 branch instructions is not known, so gcc defaults to returning their
16792 longest length, which in turn sets the far jump attribute to true.
16794 A false negative will not result in bad code being generated, but it
16795 will result in a needless push and pop of the link register. We
16796 hope that this does not occur too often.
16798 If we need doubleword stack alignment this could affect the other
16799 elimination offsets so we can't risk getting it wrong. */
16800 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
16801 cfun->machine->arg_pointer_live = 1;
16802 else if (!cfun->machine->arg_pointer_live)
16806 /* Check to see if the function contains a branch
16807 insn with the far jump attribute set. */
16808 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16810 if (GET_CODE (insn) == JUMP_INSN
16811 /* Ignore tablejump patterns. */
16812 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16813 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
16814 && get_attr_far_jump (insn) == FAR_JUMP_YES
16817 /* Record the fact that we have decided that
16818 the function does use far jumps. */
16819 cfun->machine->far_jump_used = 1;
16827 /* Return nonzero if FUNC must be entered in ARM mode. */
16829 is_called_in_ARM_mode (tree func)
16831 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
16833 /* Ignore the problem about functions whose address is taken. */
16834 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
16838 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
16844 /* The bits which aren't usefully expanded as rtl. */
16846 thumb_unexpanded_epilogue (void)
16848 arm_stack_offsets *offsets;
16850 unsigned long live_regs_mask = 0;
16851 int high_regs_pushed = 0;
16852 int had_to_push_lr;
16855 if (return_used_this_function)
16858 if (IS_NAKED (arm_current_func_type ()))
16861 offsets = arm_get_frame_offsets ();
16862 live_regs_mask = offsets->saved_regs_mask;
16863 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
16865 /* If we can deduce the registers used from the function's return value.
16866 This is more reliable that examining df_regs_ever_live_p () because that
16867 will be set if the register is ever used in the function, not just if
16868 the register is used to hold a return value. */
16869 size = arm_size_return_regs ();
16871 /* The prolog may have pushed some high registers to use as
16872 work registers. e.g. the testsuite file:
16873 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
16874 compiles to produce:
16875 push {r4, r5, r6, r7, lr}
16879 as part of the prolog. We have to undo that pushing here. */
16881 if (high_regs_pushed)
16883 unsigned long mask = live_regs_mask & 0xff;
16886 /* The available low registers depend on the size of the value we are
16894 /* Oh dear! We have no low registers into which we can pop
16897 ("no low registers available for popping high registers");
16899 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
16900 if (live_regs_mask & (1 << next_hi_reg))
16903 while (high_regs_pushed)
16905 /* Find lo register(s) into which the high register(s) can
16907 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16909 if (mask & (1 << regno))
16910 high_regs_pushed--;
16911 if (high_regs_pushed == 0)
16915 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
16917 /* Pop the values into the low register(s). */
16918 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
16920 /* Move the value(s) into the high registers. */
16921 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16923 if (mask & (1 << regno))
16925 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
16928 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
16929 if (live_regs_mask & (1 << next_hi_reg))
16934 live_regs_mask &= ~0x0f00;
16937 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
16938 live_regs_mask &= 0xff;
16940 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
16942 /* Pop the return address into the PC. */
16943 if (had_to_push_lr)
16944 live_regs_mask |= 1 << PC_REGNUM;
16946 /* Either no argument registers were pushed or a backtrace
16947 structure was created which includes an adjusted stack
16948 pointer, so just pop everything. */
16949 if (live_regs_mask)
16950 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16953 /* We have either just popped the return address into the
16954 PC or it is was kept in LR for the entire function. */
16955 if (!had_to_push_lr)
16956 thumb_exit (asm_out_file, LR_REGNUM);
16960 /* Pop everything but the return address. */
16961 if (live_regs_mask)
16962 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16965 if (had_to_push_lr)
16969 /* We have no free low regs, so save one. */
16970 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
16974 /* Get the return address into a temporary register. */
16975 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
16976 1 << LAST_ARG_REGNUM);
16980 /* Move the return address to lr. */
16981 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
16983 /* Restore the low register. */
16984 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
16989 regno = LAST_ARG_REGNUM;
16994 /* Remove the argument registers that were pushed onto the stack. */
16995 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
16996 SP_REGNUM, SP_REGNUM,
16997 crtl->args.pretend_args_size);
16999 thumb_exit (asm_out_file, regno);
17005 /* Functions to save and restore machine-specific function data. */
17006 static struct machine_function *
17007 arm_init_machine_status (void)
17009 struct machine_function *machine;
17010 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
17012 #if ARM_FT_UNKNOWN != 0
17013 machine->func_type = ARM_FT_UNKNOWN;
17018 /* Return an RTX indicating where the return address to the
17019 calling function can be found. */
17021 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
17026 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
17029 /* Do anything needed before RTL is emitted for each function. */
17031 arm_init_expanders (void)
17033 /* Arrange to initialize and mark the machine per-function status. */
17034 init_machine_status = arm_init_machine_status;
17036 /* This is to stop the combine pass optimizing away the alignment
17037 adjustment of va_arg. */
17038 /* ??? It is claimed that this should not be necessary. */
17040 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
17044 /* Like arm_compute_initial_elimination offset. Simpler because there
17045 isn't an ABI specified frame pointer for Thumb. Instead, we set it
17046 to point at the base of the local variables after static stack
17047 space for a function has been allocated. */
17050 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
17052 arm_stack_offsets *offsets;
17054 offsets = arm_get_frame_offsets ();
17058 case ARG_POINTER_REGNUM:
17061 case STACK_POINTER_REGNUM:
17062 return offsets->outgoing_args - offsets->saved_args;
17064 case FRAME_POINTER_REGNUM:
17065 return offsets->soft_frame - offsets->saved_args;
17067 case ARM_HARD_FRAME_POINTER_REGNUM:
17068 return offsets->saved_regs - offsets->saved_args;
17070 case THUMB_HARD_FRAME_POINTER_REGNUM:
17071 return offsets->locals_base - offsets->saved_args;
17074 gcc_unreachable ();
17078 case FRAME_POINTER_REGNUM:
17081 case STACK_POINTER_REGNUM:
17082 return offsets->outgoing_args - offsets->soft_frame;
17084 case ARM_HARD_FRAME_POINTER_REGNUM:
17085 return offsets->saved_regs - offsets->soft_frame;
17087 case THUMB_HARD_FRAME_POINTER_REGNUM:
17088 return offsets->locals_base - offsets->soft_frame;
17091 gcc_unreachable ();
17096 gcc_unreachable ();
17100 /* Generate the rest of a function's prologue. */
17102 thumb1_expand_prologue (void)
17106 HOST_WIDE_INT amount;
17107 arm_stack_offsets *offsets;
17108 unsigned long func_type;
17110 unsigned long live_regs_mask;
17112 func_type = arm_current_func_type ();
17114 /* Naked functions don't have prologues. */
17115 if (IS_NAKED (func_type))
17118 if (IS_INTERRUPT (func_type))
17120 error ("interrupt Service Routines cannot be coded in Thumb mode");
17124 offsets = arm_get_frame_offsets ();
17125 live_regs_mask = offsets->saved_regs_mask;
17126 /* Load the pic register before setting the frame pointer,
17127 so we can use r7 as a temporary work register. */
17128 if (flag_pic && arm_pic_register != INVALID_REGNUM)
17129 arm_load_pic_register (live_regs_mask);
17131 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
17132 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
17133 stack_pointer_rtx);
17135 amount = offsets->outgoing_args - offsets->saved_regs;
17140 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17141 GEN_INT (- amount)));
17142 RTX_FRAME_RELATED_P (insn) = 1;
17148 /* The stack decrement is too big for an immediate value in a single
17149 insn. In theory we could issue multiple subtracts, but after
17150 three of them it becomes more space efficient to place the full
17151 value in the constant pool and load into a register. (Also the
17152 ARM debugger really likes to see only one stack decrement per
17153 function). So instead we look for a scratch register into which
17154 we can load the decrement, and then we subtract this from the
17155 stack pointer. Unfortunately on the thumb the only available
17156 scratch registers are the argument registers, and we cannot use
17157 these as they may hold arguments to the function. Instead we
17158 attempt to locate a call preserved register which is used by this
17159 function. If we can find one, then we know that it will have
17160 been pushed at the start of the prologue and so we can corrupt
17162 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
17163 if (live_regs_mask & (1 << regno))
17166 gcc_assert(regno <= LAST_LO_REGNUM);
17168 reg = gen_rtx_REG (SImode, regno);
17170 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
17172 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
17173 stack_pointer_rtx, reg));
17174 RTX_FRAME_RELATED_P (insn) = 1;
17175 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17176 plus_constant (stack_pointer_rtx,
17178 RTX_FRAME_RELATED_P (dwarf) = 1;
17180 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
17185 if (frame_pointer_needed)
17186 thumb_set_frame_pointer (offsets);
17188 /* If we are profiling, make sure no instructions are scheduled before
17189 the call to mcount. Similarly if the user has requested no
17190 scheduling in the prolog. Similarly if we want non-call exceptions
17191 using the EABI unwinder, to prevent faulting instructions from being
17192 swapped with a stack adjustment. */
17193 if (crtl->profile || !TARGET_SCHED_PROLOG
17194 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
17195 emit_insn (gen_blockage ());
17197 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
17198 if (live_regs_mask & 0xff)
17199 cfun->machine->lr_save_eliminated = 0;
17204 thumb1_expand_epilogue (void)
17206 HOST_WIDE_INT amount;
17207 arm_stack_offsets *offsets;
17210 /* Naked functions don't have prologues. */
17211 if (IS_NAKED (arm_current_func_type ()))
17214 offsets = arm_get_frame_offsets ();
17215 amount = offsets->outgoing_args - offsets->saved_regs;
17217 if (frame_pointer_needed)
17219 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
17220 amount = offsets->locals_base - offsets->saved_regs;
17223 gcc_assert (amount >= 0);
17227 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17228 GEN_INT (amount)));
17231 /* r3 is always free in the epilogue. */
17232 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
17234 emit_insn (gen_movsi (reg, GEN_INT (amount)));
17235 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
17239 /* Emit a USE (stack_pointer_rtx), so that
17240 the stack adjustment will not be deleted. */
17241 emit_insn (gen_prologue_use (stack_pointer_rtx));
17243 if (crtl->profile || !TARGET_SCHED_PROLOG)
17244 emit_insn (gen_blockage ());
17246 /* Emit a clobber for each insn that will be restored in the epilogue,
17247 so that flow2 will get register lifetimes correct. */
17248 for (regno = 0; regno < 13; regno++)
17249 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
17250 emit_clobber (gen_rtx_REG (SImode, regno));
17252 if (! df_regs_ever_live_p (LR_REGNUM))
17253 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
17257 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
17259 arm_stack_offsets *offsets;
17260 unsigned long live_regs_mask = 0;
17261 unsigned long l_mask;
17262 unsigned high_regs_pushed = 0;
17263 int cfa_offset = 0;
17266 if (IS_NAKED (arm_current_func_type ()))
17269 if (is_called_in_ARM_mode (current_function_decl))
17273 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
17274 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
17276 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
17278 /* Generate code sequence to switch us into Thumb mode. */
17279 /* The .code 32 directive has already been emitted by
17280 ASM_DECLARE_FUNCTION_NAME. */
17281 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
17282 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
17284 /* Generate a label, so that the debugger will notice the
17285 change in instruction sets. This label is also used by
17286 the assembler to bypass the ARM code when this function
17287 is called from a Thumb encoded function elsewhere in the
17288 same file. Hence the definition of STUB_NAME here must
17289 agree with the definition in gas/config/tc-arm.c. */
17291 #define STUB_NAME ".real_start_of"
17293 fprintf (f, "\t.code\t16\n");
17295 if (arm_dllexport_name_p (name))
17296 name = arm_strip_name_encoding (name);
17298 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
17299 fprintf (f, "\t.thumb_func\n");
17300 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
17303 if (crtl->args.pretend_args_size)
17305 /* Output unwind directive for the stack adjustment. */
17306 if (ARM_EABI_UNWIND_TABLES)
17307 fprintf (f, "\t.pad #%d\n",
17308 crtl->args.pretend_args_size);
17310 if (cfun->machine->uses_anonymous_args)
17314 fprintf (f, "\tpush\t{");
17316 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
17318 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
17319 regno <= LAST_ARG_REGNUM;
17321 asm_fprintf (f, "%r%s", regno,
17322 regno == LAST_ARG_REGNUM ? "" : ", ");
17324 fprintf (f, "}\n");
17327 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
17328 SP_REGNUM, SP_REGNUM,
17329 crtl->args.pretend_args_size);
17331 /* We don't need to record the stores for unwinding (would it
17332 help the debugger any if we did?), but record the change in
17333 the stack pointer. */
17334 if (dwarf2out_do_frame ())
17336 char *l = dwarf2out_cfi_label ();
17338 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
17339 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17343 /* Get the registers we are going to push. */
17344 offsets = arm_get_frame_offsets ();
17345 live_regs_mask = offsets->saved_regs_mask;
17346 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
17347 l_mask = live_regs_mask & 0x40ff;
17348 /* Then count how many other high registers will need to be pushed. */
17349 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
17351 if (TARGET_BACKTRACE)
17354 unsigned work_register;
17356 /* We have been asked to create a stack backtrace structure.
17357 The code looks like this:
17361 0 sub SP, #16 Reserve space for 4 registers.
17362 2 push {R7} Push low registers.
17363 4 add R7, SP, #20 Get the stack pointer before the push.
17364 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
17365 8 mov R7, PC Get hold of the start of this code plus 12.
17366 10 str R7, [SP, #16] Store it.
17367 12 mov R7, FP Get hold of the current frame pointer.
17368 14 str R7, [SP, #4] Store it.
17369 16 mov R7, LR Get hold of the current return address.
17370 18 str R7, [SP, #12] Store it.
17371 20 add R7, SP, #16 Point at the start of the backtrace structure.
17372 22 mov FP, R7 Put this value into the frame pointer. */
17374 work_register = thumb_find_work_register (live_regs_mask);
17376 if (ARM_EABI_UNWIND_TABLES)
17377 asm_fprintf (f, "\t.pad #16\n");
17380 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
17381 SP_REGNUM, SP_REGNUM);
17383 if (dwarf2out_do_frame ())
17385 char *l = dwarf2out_cfi_label ();
17387 cfa_offset = cfa_offset + 16;
17388 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17393 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17394 offset = bit_count (l_mask) * UNITS_PER_WORD;
17399 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17400 offset + 16 + crtl->args.pretend_args_size);
17402 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17405 /* Make sure that the instruction fetching the PC is in the right place
17406 to calculate "start of backtrace creation code + 12". */
17409 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17410 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17412 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17413 ARM_HARD_FRAME_POINTER_REGNUM);
17414 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17419 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17420 ARM_HARD_FRAME_POINTER_REGNUM);
17421 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17423 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17424 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17428 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
17429 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17431 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17433 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
17434 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
17436 /* Optimization: If we are not pushing any low registers but we are going
17437 to push some high registers then delay our first push. This will just
17438 be a push of LR and we can combine it with the push of the first high
17440 else if ((l_mask & 0xff) != 0
17441 || (high_regs_pushed == 0 && l_mask))
17442 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17444 if (high_regs_pushed)
17446 unsigned pushable_regs;
17447 unsigned next_hi_reg;
17449 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
17450 if (live_regs_mask & (1 << next_hi_reg))
17453 pushable_regs = l_mask & 0xff;
17455 if (pushable_regs == 0)
17456 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
17458 while (high_regs_pushed > 0)
17460 unsigned long real_regs_mask = 0;
17462 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
17464 if (pushable_regs & (1 << regno))
17466 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
17468 high_regs_pushed --;
17469 real_regs_mask |= (1 << next_hi_reg);
17471 if (high_regs_pushed)
17473 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
17475 if (live_regs_mask & (1 << next_hi_reg))
17480 pushable_regs &= ~((1 << regno) - 1);
17486 /* If we had to find a work register and we have not yet
17487 saved the LR then add it to the list of regs to push. */
17488 if (l_mask == (1 << LR_REGNUM))
17490 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
17492 real_regs_mask | (1 << LR_REGNUM));
17496 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
17501 /* Handle the case of a double word load into a low register from
17502 a computed memory address. The computed address may involve a
17503 register which is overwritten by the load. */
17505 thumb_load_double_from_address (rtx *operands)
17513 gcc_assert (GET_CODE (operands[0]) == REG);
17514 gcc_assert (GET_CODE (operands[1]) == MEM);
17516 /* Get the memory address. */
17517 addr = XEXP (operands[1], 0);
17519 /* Work out how the memory address is computed. */
17520 switch (GET_CODE (addr))
17523 operands[2] = adjust_address (operands[1], SImode, 4);
17525 if (REGNO (operands[0]) == REGNO (addr))
17527 output_asm_insn ("ldr\t%H0, %2", operands);
17528 output_asm_insn ("ldr\t%0, %1", operands);
17532 output_asm_insn ("ldr\t%0, %1", operands);
17533 output_asm_insn ("ldr\t%H0, %2", operands);
17538 /* Compute <address> + 4 for the high order load. */
17539 operands[2] = adjust_address (operands[1], SImode, 4);
17541 output_asm_insn ("ldr\t%0, %1", operands);
17542 output_asm_insn ("ldr\t%H0, %2", operands);
17546 arg1 = XEXP (addr, 0);
17547 arg2 = XEXP (addr, 1);
17549 if (CONSTANT_P (arg1))
17550 base = arg2, offset = arg1;
17552 base = arg1, offset = arg2;
17554 gcc_assert (GET_CODE (base) == REG);
17556 /* Catch the case of <address> = <reg> + <reg> */
17557 if (GET_CODE (offset) == REG)
17559 int reg_offset = REGNO (offset);
17560 int reg_base = REGNO (base);
17561 int reg_dest = REGNO (operands[0]);
17563 /* Add the base and offset registers together into the
17564 higher destination register. */
17565 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
17566 reg_dest + 1, reg_base, reg_offset);
17568 /* Load the lower destination register from the address in
17569 the higher destination register. */
17570 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
17571 reg_dest, reg_dest + 1);
17573 /* Load the higher destination register from its own address
17575 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
17576 reg_dest + 1, reg_dest + 1);
17580 /* Compute <address> + 4 for the high order load. */
17581 operands[2] = adjust_address (operands[1], SImode, 4);
17583 /* If the computed address is held in the low order register
17584 then load the high order register first, otherwise always
17585 load the low order register first. */
17586 if (REGNO (operands[0]) == REGNO (base))
17588 output_asm_insn ("ldr\t%H0, %2", operands);
17589 output_asm_insn ("ldr\t%0, %1", operands);
17593 output_asm_insn ("ldr\t%0, %1", operands);
17594 output_asm_insn ("ldr\t%H0, %2", operands);
17600 /* With no registers to worry about we can just load the value
17602 operands[2] = adjust_address (operands[1], SImode, 4);
17604 output_asm_insn ("ldr\t%H0, %2", operands);
17605 output_asm_insn ("ldr\t%0, %1", operands);
17609 gcc_unreachable ();
17616 thumb_output_move_mem_multiple (int n, rtx *operands)
17623 if (REGNO (operands[4]) > REGNO (operands[5]))
17626 operands[4] = operands[5];
17629 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
17630 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
17634 if (REGNO (operands[4]) > REGNO (operands[5]))
17637 operands[4] = operands[5];
17640 if (REGNO (operands[5]) > REGNO (operands[6]))
17643 operands[5] = operands[6];
17646 if (REGNO (operands[4]) > REGNO (operands[5]))
17649 operands[4] = operands[5];
17653 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
17654 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
17658 gcc_unreachable ();
17664 /* Output a call-via instruction for thumb state. */
17666 thumb_call_via_reg (rtx reg)
17668 int regno = REGNO (reg);
17671 gcc_assert (regno < LR_REGNUM);
17673 /* If we are in the normal text section we can use a single instance
17674 per compilation unit. If we are doing function sections, then we need
17675 an entry per section, since we can't rely on reachability. */
17676 if (in_section == text_section)
17678 thumb_call_reg_needed = 1;
17680 if (thumb_call_via_label[regno] == NULL)
17681 thumb_call_via_label[regno] = gen_label_rtx ();
17682 labelp = thumb_call_via_label + regno;
17686 if (cfun->machine->call_via[regno] == NULL)
17687 cfun->machine->call_via[regno] = gen_label_rtx ();
17688 labelp = cfun->machine->call_via + regno;
17691 output_asm_insn ("bl\t%a0", labelp);
17695 /* Routines for generating rtl. */
17697 thumb_expand_movmemqi (rtx *operands)
17699 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
17700 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
17701 HOST_WIDE_INT len = INTVAL (operands[2]);
17702 HOST_WIDE_INT offset = 0;
17706 emit_insn (gen_movmem12b (out, in, out, in));
17712 emit_insn (gen_movmem8b (out, in, out, in));
17718 rtx reg = gen_reg_rtx (SImode);
17719 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
17720 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
17727 rtx reg = gen_reg_rtx (HImode);
17728 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
17729 plus_constant (in, offset))));
17730 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
17738 rtx reg = gen_reg_rtx (QImode);
17739 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
17740 plus_constant (in, offset))));
17741 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
17747 thumb_reload_out_hi (rtx *operands)
17749 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
17752 /* Handle reading a half-word from memory during reload. */
17754 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
17756 gcc_unreachable ();
17759 /* Return the length of a function name prefix
17760 that starts with the character 'c'. */
17762 arm_get_strip_length (int c)
17766 ARM_NAME_ENCODING_LENGTHS
17771 /* Return a pointer to a function's name with any
17772 and all prefix encodings stripped from it. */
17774 arm_strip_name_encoding (const char *name)
17778 while ((skip = arm_get_strip_length (* name)))
17784 /* If there is a '*' anywhere in the name's prefix, then
17785 emit the stripped name verbatim, otherwise prepend an
17786 underscore if leading underscores are being used. */
17788 arm_asm_output_labelref (FILE *stream, const char *name)
17793 while ((skip = arm_get_strip_length (* name)))
17795 verbatim |= (*name == '*');
17800 fputs (name, stream);
17802 asm_fprintf (stream, "%U%s", name);
17806 arm_file_start (void)
17810 if (TARGET_UNIFIED_ASM)
17811 asm_fprintf (asm_out_file, "\t.syntax unified\n");
17815 const char *fpu_name;
17816 if (arm_select[0].string)
17817 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
17818 else if (arm_select[1].string)
17819 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
17821 asm_fprintf (asm_out_file, "\t.cpu %s\n",
17822 all_cores[arm_default_cpu].name);
17824 if (TARGET_SOFT_FLOAT)
17827 fpu_name = "softvfp";
17829 fpu_name = "softfpa";
17833 int set_float_abi_attributes = 0;
17834 switch (arm_fpu_arch)
17839 case FPUTYPE_FPA_EMU2:
17842 case FPUTYPE_FPA_EMU3:
17845 case FPUTYPE_MAVERICK:
17846 fpu_name = "maverick";
17850 set_float_abi_attributes = 1;
17852 case FPUTYPE_VFP3D16:
17853 fpu_name = "vfpv3-d16";
17854 set_float_abi_attributes = 1;
17857 fpu_name = "vfpv3";
17858 set_float_abi_attributes = 1;
17862 set_float_abi_attributes = 1;
17867 if (set_float_abi_attributes)
17869 if (TARGET_HARD_FLOAT)
17870 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
17871 if (TARGET_HARD_FLOAT_ABI)
17872 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
17875 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
17877 /* Some of these attributes only apply when the corresponding features
17878 are used. However we don't have any easy way of figuring this out.
17879 Conservatively record the setting that would have been used. */
17881 /* Tag_ABI_FP_rounding. */
17882 if (flag_rounding_math)
17883 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
17884 if (!flag_unsafe_math_optimizations)
17886 /* Tag_ABI_FP_denomal. */
17887 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
17888 /* Tag_ABI_FP_exceptions. */
17889 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
17891 /* Tag_ABI_FP_user_exceptions. */
17892 if (flag_signaling_nans)
17893 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
17894 /* Tag_ABI_FP_number_model. */
17895 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
17896 flag_finite_math_only ? 1 : 3);
17898 /* Tag_ABI_align8_needed. */
17899 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
17900 /* Tag_ABI_align8_preserved. */
17901 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
17902 /* Tag_ABI_enum_size. */
17903 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
17904 flag_short_enums ? 1 : 2);
17906 /* Tag_ABI_optimization_goals. */
17909 else if (optimize >= 2)
17915 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
17917 if (arm_lang_output_object_attributes_hook)
17918 arm_lang_output_object_attributes_hook();
17920 default_file_start();
17924 arm_file_end (void)
17928 if (NEED_INDICATE_EXEC_STACK)
17929 /* Add .note.GNU-stack. */
17930 file_end_indicate_exec_stack ();
17932 if (! thumb_call_reg_needed)
17935 switch_to_section (text_section);
17936 asm_fprintf (asm_out_file, "\t.code 16\n");
17937 ASM_OUTPUT_ALIGN (asm_out_file, 1);
17939 for (regno = 0; regno < LR_REGNUM; regno++)
17941 rtx label = thumb_call_via_label[regno];
17945 targetm.asm_out.internal_label (asm_out_file, "L",
17946 CODE_LABEL_NUMBER (label));
17947 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
17953 /* Symbols in the text segment can be accessed without indirecting via the
17954 constant pool; it may take an extra binary operation, but this is still
17955 faster than indirecting via memory. Don't do this when not optimizing,
17956 since we won't be calculating al of the offsets necessary to do this
17960 arm_encode_section_info (tree decl, rtx rtl, int first)
17962 if (optimize > 0 && TREE_CONSTANT (decl))
17963 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
17965 default_encode_section_info (decl, rtl, first);
17967 #endif /* !ARM_PE */
17970 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
17972 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
17973 && !strcmp (prefix, "L"))
17975 arm_ccfsm_state = 0;
17976 arm_target_insn = NULL;
17978 default_internal_label (stream, prefix, labelno);
17981 /* Output code to add DELTA to the first argument, and then jump
17982 to FUNCTION. Used for C++ multiple inheritance. */
17984 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
17985 HOST_WIDE_INT delta,
17986 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
17989 static int thunk_label = 0;
17992 int mi_delta = delta;
17993 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
17995 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
17998 mi_delta = - mi_delta;
18002 int labelno = thunk_label++;
18003 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
18004 /* Thunks are entered in arm mode when avaiable. */
18005 if (TARGET_THUMB1_ONLY)
18007 /* push r3 so we can use it as a temporary. */
18008 /* TODO: Omit this save if r3 is not used. */
18009 fputs ("\tpush {r3}\n", file);
18010 fputs ("\tldr\tr3, ", file);
18014 fputs ("\tldr\tr12, ", file);
18016 assemble_name (file, label);
18017 fputc ('\n', file);
18020 /* If we are generating PIC, the ldr instruction below loads
18021 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
18022 the address of the add + 8, so we have:
18024 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
18027 Note that we have "+ 1" because some versions of GNU ld
18028 don't set the low bit of the result for R_ARM_REL32
18029 relocations against thumb function symbols.
18030 On ARMv6M this is +4, not +8. */
18031 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
18032 assemble_name (file, labelpc);
18033 fputs (":\n", file);
18034 if (TARGET_THUMB1_ONLY)
18036 /* This is 2 insns after the start of the thunk, so we know it
18037 is 4-byte aligned. */
18038 fputs ("\tadd\tr3, pc, r3\n", file);
18039 fputs ("\tmov r12, r3\n", file);
18042 fputs ("\tadd\tr12, pc, r12\n", file);
18044 else if (TARGET_THUMB1_ONLY)
18045 fputs ("\tmov r12, r3\n", file);
18047 if (TARGET_THUMB1_ONLY)
18049 if (mi_delta > 255)
18051 fputs ("\tldr\tr3, ", file);
18052 assemble_name (file, label);
18053 fputs ("+4\n", file);
18054 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
18055 mi_op, this_regno, this_regno);
18057 else if (mi_delta != 0)
18059 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
18060 mi_op, this_regno, this_regno,
18066 /* TODO: Use movw/movt for large constants when available. */
18067 while (mi_delta != 0)
18069 if ((mi_delta & (3 << shift)) == 0)
18073 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
18074 mi_op, this_regno, this_regno,
18075 mi_delta & (0xff << shift));
18076 mi_delta &= ~(0xff << shift);
18083 if (TARGET_THUMB1_ONLY)
18084 fputs ("\tpop\t{r3}\n", file);
18086 fprintf (file, "\tbx\tr12\n");
18087 ASM_OUTPUT_ALIGN (file, 2);
18088 assemble_name (file, label);
18089 fputs (":\n", file);
18092 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
18093 rtx tem = XEXP (DECL_RTL (function), 0);
18094 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
18095 tem = gen_rtx_MINUS (GET_MODE (tem),
18097 gen_rtx_SYMBOL_REF (Pmode,
18098 ggc_strdup (labelpc)));
18099 assemble_integer (tem, 4, BITS_PER_WORD, 1);
18102 /* Output ".word .LTHUNKn". */
18103 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
18105 if (TARGET_THUMB1_ONLY && mi_delta > 255)
18106 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
18110 fputs ("\tb\t", file);
18111 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
18112 if (NEED_PLT_RELOC)
18113 fputs ("(PLT)", file);
18114 fputc ('\n', file);
18119 arm_emit_vector_const (FILE *file, rtx x)
18122 const char * pattern;
18124 gcc_assert (GET_CODE (x) == CONST_VECTOR);
18126 switch (GET_MODE (x))
18128 case V2SImode: pattern = "%08x"; break;
18129 case V4HImode: pattern = "%04x"; break;
18130 case V8QImode: pattern = "%02x"; break;
18131 default: gcc_unreachable ();
18134 fprintf (file, "0x");
18135 for (i = CONST_VECTOR_NUNITS (x); i--;)
18139 element = CONST_VECTOR_ELT (x, i);
18140 fprintf (file, pattern, INTVAL (element));
18147 arm_output_load_gr (rtx *operands)
18154 if (GET_CODE (operands [1]) != MEM
18155 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
18156 || GET_CODE (reg = XEXP (sum, 0)) != REG
18157 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
18158 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
18159 return "wldrw%?\t%0, %1";
18161 /* Fix up an out-of-range load of a GR register. */
18162 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
18163 wcgr = operands[0];
18165 output_asm_insn ("ldr%?\t%0, %1", operands);
18167 operands[0] = wcgr;
18169 output_asm_insn ("tmcr%?\t%0, %1", operands);
18170 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
18175 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
18177 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
18178 named arg and all anonymous args onto the stack.
18179 XXX I know the prologue shouldn't be pushing registers, but it is faster
18183 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
18184 enum machine_mode mode,
18187 int second_time ATTRIBUTE_UNUSED)
18189 int nregs = cum->nregs;
18191 && ARM_DOUBLEWORD_ALIGN
18192 && arm_needs_doubleword_align (mode, type))
18195 cfun->machine->uses_anonymous_args = 1;
18196 if (nregs < NUM_ARG_REGS)
18197 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
18200 /* Return nonzero if the CONSUMER instruction (a store) does not need
18201 PRODUCER's value to calculate the address. */
18204 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
18206 rtx value = PATTERN (producer);
18207 rtx addr = PATTERN (consumer);
18209 if (GET_CODE (value) == COND_EXEC)
18210 value = COND_EXEC_CODE (value);
18211 if (GET_CODE (value) == PARALLEL)
18212 value = XVECEXP (value, 0, 0);
18213 value = XEXP (value, 0);
18214 if (GET_CODE (addr) == COND_EXEC)
18215 addr = COND_EXEC_CODE (addr);
18216 if (GET_CODE (addr) == PARALLEL)
18217 addr = XVECEXP (addr, 0, 0);
18218 addr = XEXP (addr, 0);
18220 return !reg_overlap_mentioned_p (value, addr);
18223 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18224 have an early register shift value or amount dependency on the
18225 result of PRODUCER. */
18228 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
18230 rtx value = PATTERN (producer);
18231 rtx op = PATTERN (consumer);
18234 if (GET_CODE (value) == COND_EXEC)
18235 value = COND_EXEC_CODE (value);
18236 if (GET_CODE (value) == PARALLEL)
18237 value = XVECEXP (value, 0, 0);
18238 value = XEXP (value, 0);
18239 if (GET_CODE (op) == COND_EXEC)
18240 op = COND_EXEC_CODE (op);
18241 if (GET_CODE (op) == PARALLEL)
18242 op = XVECEXP (op, 0, 0);
18245 early_op = XEXP (op, 0);
18246 /* This is either an actual independent shift, or a shift applied to
18247 the first operand of another operation. We want the whole shift
18249 if (GET_CODE (early_op) == REG)
18252 return !reg_overlap_mentioned_p (value, early_op);
18255 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18256 have an early register shift value dependency on the result of
18260 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
18262 rtx value = PATTERN (producer);
18263 rtx op = PATTERN (consumer);
18266 if (GET_CODE (value) == COND_EXEC)
18267 value = COND_EXEC_CODE (value);
18268 if (GET_CODE (value) == PARALLEL)
18269 value = XVECEXP (value, 0, 0);
18270 value = XEXP (value, 0);
18271 if (GET_CODE (op) == COND_EXEC)
18272 op = COND_EXEC_CODE (op);
18273 if (GET_CODE (op) == PARALLEL)
18274 op = XVECEXP (op, 0, 0);
18277 early_op = XEXP (op, 0);
18279 /* This is either an actual independent shift, or a shift applied to
18280 the first operand of another operation. We want the value being
18281 shifted, in either case. */
18282 if (GET_CODE (early_op) != REG)
18283 early_op = XEXP (early_op, 0);
18285 return !reg_overlap_mentioned_p (value, early_op);
18288 /* Return nonzero if the CONSUMER (a mul or mac op) does not
18289 have an early register mult dependency on the result of
18293 arm_no_early_mul_dep (rtx producer, rtx consumer)
18295 rtx value = PATTERN (producer);
18296 rtx op = PATTERN (consumer);
18298 if (GET_CODE (value) == COND_EXEC)
18299 value = COND_EXEC_CODE (value);
18300 if (GET_CODE (value) == PARALLEL)
18301 value = XVECEXP (value, 0, 0);
18302 value = XEXP (value, 0);
18303 if (GET_CODE (op) == COND_EXEC)
18304 op = COND_EXEC_CODE (op);
18305 if (GET_CODE (op) == PARALLEL)
18306 op = XVECEXP (op, 0, 0);
18309 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
18311 if (GET_CODE (XEXP (op, 0)) == MULT)
18312 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
18314 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
18320 /* We can't rely on the caller doing the proper promotion when
18321 using APCS or ATPCS. */
18324 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
18326 return !TARGET_AAPCS_BASED;
18330 /* AAPCS based ABIs use short enums by default. */
18333 arm_default_short_enums (void)
18335 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
18339 /* AAPCS requires that anonymous bitfields affect structure alignment. */
18342 arm_align_anon_bitfield (void)
18344 return TARGET_AAPCS_BASED;
18348 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
18351 arm_cxx_guard_type (void)
18353 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
18356 /* Return non-zero if the consumer (a multiply-accumulate instruction)
18357 has an accumulator dependency on the result of the producer (a
18358 multiplication instruction) and no other dependency on that result. */
18360 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
18362 rtx mul = PATTERN (producer);
18363 rtx mac = PATTERN (consumer);
18365 rtx mac_op0, mac_op1, mac_acc;
18367 if (GET_CODE (mul) == COND_EXEC)
18368 mul = COND_EXEC_CODE (mul);
18369 if (GET_CODE (mac) == COND_EXEC)
18370 mac = COND_EXEC_CODE (mac);
18372 /* Check that mul is of the form (set (...) (mult ...))
18373 and mla is of the form (set (...) (plus (mult ...) (...))). */
18374 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
18375 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
18376 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
18379 mul_result = XEXP (mul, 0);
18380 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
18381 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
18382 mac_acc = XEXP (XEXP (mac, 1), 1);
18384 return (reg_overlap_mentioned_p (mul_result, mac_acc)
18385 && !reg_overlap_mentioned_p (mul_result, mac_op0)
18386 && !reg_overlap_mentioned_p (mul_result, mac_op1));
18390 /* The EABI says test the least significant bit of a guard variable. */
18393 arm_cxx_guard_mask_bit (void)
18395 return TARGET_AAPCS_BASED;
18399 /* The EABI specifies that all array cookies are 8 bytes long. */
18402 arm_get_cookie_size (tree type)
18406 if (!TARGET_AAPCS_BASED)
18407 return default_cxx_get_cookie_size (type);
18409 size = build_int_cst (sizetype, 8);
18414 /* The EABI says that array cookies should also contain the element size. */
18417 arm_cookie_has_size (void)
18419 return TARGET_AAPCS_BASED;
18423 /* The EABI says constructors and destructors should return a pointer to
18424 the object constructed/destroyed. */
18427 arm_cxx_cdtor_returns_this (void)
18429 return TARGET_AAPCS_BASED;
18432 /* The EABI says that an inline function may never be the key
18436 arm_cxx_key_method_may_be_inline (void)
18438 return !TARGET_AAPCS_BASED;
18442 arm_cxx_determine_class_data_visibility (tree decl)
18444 if (!TARGET_AAPCS_BASED
18445 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
18448 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
18449 is exported. However, on systems without dynamic vague linkage,
18450 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
18451 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
18452 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
18454 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
18455 DECL_VISIBILITY_SPECIFIED (decl) = 1;
18459 arm_cxx_class_data_always_comdat (void)
18461 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
18462 vague linkage if the class has no key function. */
18463 return !TARGET_AAPCS_BASED;
18467 /* The EABI says __aeabi_atexit should be used to register static
18471 arm_cxx_use_aeabi_atexit (void)
18473 return TARGET_AAPCS_BASED;
18478 arm_set_return_address (rtx source, rtx scratch)
18480 arm_stack_offsets *offsets;
18481 HOST_WIDE_INT delta;
18483 unsigned long saved_regs;
18485 offsets = arm_get_frame_offsets ();
18486 saved_regs = offsets->saved_regs_mask;
18488 if ((saved_regs & (1 << LR_REGNUM)) == 0)
18489 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18492 if (frame_pointer_needed)
18493 addr = plus_constant(hard_frame_pointer_rtx, -4);
18496 /* LR will be the first saved register. */
18497 delta = offsets->outgoing_args - (offsets->frame + 4);
18502 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
18503 GEN_INT (delta & ~4095)));
18508 addr = stack_pointer_rtx;
18510 addr = plus_constant (addr, delta);
18512 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18518 thumb_set_return_address (rtx source, rtx scratch)
18520 arm_stack_offsets *offsets;
18521 HOST_WIDE_INT delta;
18522 HOST_WIDE_INT limit;
18525 unsigned long mask;
18529 offsets = arm_get_frame_offsets ();
18530 mask = offsets->saved_regs_mask;
18531 if (mask & (1 << LR_REGNUM))
18534 /* Find the saved regs. */
18535 if (frame_pointer_needed)
18537 delta = offsets->soft_frame - offsets->saved_args;
18538 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
18544 delta = offsets->outgoing_args - offsets->saved_args;
18547 /* Allow for the stack frame. */
18548 if (TARGET_THUMB1 && TARGET_BACKTRACE)
18550 /* The link register is always the first saved register. */
18553 /* Construct the address. */
18554 addr = gen_rtx_REG (SImode, reg);
18557 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
18558 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
18562 addr = plus_constant (addr, delta);
18564 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18567 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18570 /* Implements target hook vector_mode_supported_p. */
18572 arm_vector_mode_supported_p (enum machine_mode mode)
18574 /* Neon also supports V2SImode, etc. listed in the clause below. */
18575 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
18576 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
18579 if ((mode == V2SImode)
18580 || (mode == V4HImode)
18581 || (mode == V8QImode))
18587 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
18588 ARM insns and therefore guarantee that the shift count is modulo 256.
18589 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
18590 guarantee no particular behavior for out-of-range counts. */
18592 static unsigned HOST_WIDE_INT
18593 arm_shift_truncation_mask (enum machine_mode mode)
18595 return mode == SImode ? 255 : 0;
18599 /* Map internal gcc register numbers to DWARF2 register numbers. */
18602 arm_dbx_register_number (unsigned int regno)
18607 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
18608 compatibility. The EABI defines them as registers 96-103. */
18609 if (IS_FPA_REGNUM (regno))
18610 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
18612 /* FIXME: VFPv3 register numbering. */
18613 if (IS_VFP_REGNUM (regno))
18614 return 64 + regno - FIRST_VFP_REGNUM;
18616 if (IS_IWMMXT_GR_REGNUM (regno))
18617 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
18619 if (IS_IWMMXT_REGNUM (regno))
18620 return 112 + regno - FIRST_IWMMXT_REGNUM;
18622 gcc_unreachable ();
18626 #ifdef TARGET_UNWIND_INFO
18627 /* Emit unwind directives for a store-multiple instruction or stack pointer
18628 push during alignment.
18629 These should only ever be generated by the function prologue code, so
18630 expect them to have a particular form. */
18633 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
18636 HOST_WIDE_INT offset;
18637 HOST_WIDE_INT nregs;
18643 e = XVECEXP (p, 0, 0);
18644 if (GET_CODE (e) != SET)
18647 /* First insn will adjust the stack pointer. */
18648 if (GET_CODE (e) != SET
18649 || GET_CODE (XEXP (e, 0)) != REG
18650 || REGNO (XEXP (e, 0)) != SP_REGNUM
18651 || GET_CODE (XEXP (e, 1)) != PLUS)
18654 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
18655 nregs = XVECLEN (p, 0) - 1;
18657 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
18660 /* The function prologue may also push pc, but not annotate it as it is
18661 never restored. We turn this into a stack pointer adjustment. */
18662 if (nregs * 4 == offset - 4)
18664 fprintf (asm_out_file, "\t.pad #4\n");
18668 fprintf (asm_out_file, "\t.save {");
18670 else if (IS_VFP_REGNUM (reg))
18673 fprintf (asm_out_file, "\t.vsave {");
18675 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
18677 /* FPA registers are done differently. */
18678 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
18682 /* Unknown register type. */
18685 /* If the stack increment doesn't match the size of the saved registers,
18686 something has gone horribly wrong. */
18687 if (offset != nregs * reg_size)
18692 /* The remaining insns will describe the stores. */
18693 for (i = 1; i <= nregs; i++)
18695 /* Expect (set (mem <addr>) (reg)).
18696 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
18697 e = XVECEXP (p, 0, i);
18698 if (GET_CODE (e) != SET
18699 || GET_CODE (XEXP (e, 0)) != MEM
18700 || GET_CODE (XEXP (e, 1)) != REG)
18703 reg = REGNO (XEXP (e, 1));
18708 fprintf (asm_out_file, ", ");
18709 /* We can't use %r for vfp because we need to use the
18710 double precision register names. */
18711 if (IS_VFP_REGNUM (reg))
18712 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
18714 asm_fprintf (asm_out_file, "%r", reg);
18716 #ifdef ENABLE_CHECKING
18717 /* Check that the addresses are consecutive. */
18718 e = XEXP (XEXP (e, 0), 0);
18719 if (GET_CODE (e) == PLUS)
18721 offset += reg_size;
18722 if (GET_CODE (XEXP (e, 0)) != REG
18723 || REGNO (XEXP (e, 0)) != SP_REGNUM
18724 || GET_CODE (XEXP (e, 1)) != CONST_INT
18725 || offset != INTVAL (XEXP (e, 1)))
18729 || GET_CODE (e) != REG
18730 || REGNO (e) != SP_REGNUM)
18734 fprintf (asm_out_file, "}\n");
18737 /* Emit unwind directives for a SET. */
18740 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
18748 switch (GET_CODE (e0))
18751 /* Pushing a single register. */
18752 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
18753 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
18754 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
18757 asm_fprintf (asm_out_file, "\t.save ");
18758 if (IS_VFP_REGNUM (REGNO (e1)))
18759 asm_fprintf(asm_out_file, "{d%d}\n",
18760 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
18762 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
18766 if (REGNO (e0) == SP_REGNUM)
18768 /* A stack increment. */
18769 if (GET_CODE (e1) != PLUS
18770 || GET_CODE (XEXP (e1, 0)) != REG
18771 || REGNO (XEXP (e1, 0)) != SP_REGNUM
18772 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18775 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
18776 -INTVAL (XEXP (e1, 1)));
18778 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
18780 HOST_WIDE_INT offset;
18782 if (GET_CODE (e1) == PLUS)
18784 if (GET_CODE (XEXP (e1, 0)) != REG
18785 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18787 reg = REGNO (XEXP (e1, 0));
18788 offset = INTVAL (XEXP (e1, 1));
18789 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
18790 HARD_FRAME_POINTER_REGNUM, reg,
18791 INTVAL (XEXP (e1, 1)));
18793 else if (GET_CODE (e1) == REG)
18796 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
18797 HARD_FRAME_POINTER_REGNUM, reg);
18802 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
18804 /* Move from sp to reg. */
18805 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
18807 else if (GET_CODE (e1) == PLUS
18808 && GET_CODE (XEXP (e1, 0)) == REG
18809 && REGNO (XEXP (e1, 0)) == SP_REGNUM
18810 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
18812 /* Set reg to offset from sp. */
18813 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
18814 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
18816 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
18818 /* Stack pointer save before alignment. */
18820 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
18833 /* Emit unwind directives for the given insn. */
18836 arm_unwind_emit (FILE * asm_out_file, rtx insn)
18840 if (!ARM_EABI_UNWIND_TABLES)
18843 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
18844 && (TREE_NOTHROW (current_function_decl)
18845 || crtl->all_throwers_are_sibcalls))
18848 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
18851 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
18853 pat = XEXP (pat, 0);
18855 pat = PATTERN (insn);
18857 switch (GET_CODE (pat))
18860 arm_unwind_emit_set (asm_out_file, pat);
18864 /* Store multiple. */
18865 arm_unwind_emit_sequence (asm_out_file, pat);
18874 /* Output a reference from a function exception table to the type_info
18875 object X. The EABI specifies that the symbol should be relocated by
18876 an R_ARM_TARGET2 relocation. */
18879 arm_output_ttype (rtx x)
18881 fputs ("\t.word\t", asm_out_file);
18882 output_addr_const (asm_out_file, x);
18883 /* Use special relocations for symbol references. */
18884 if (GET_CODE (x) != CONST_INT)
18885 fputs ("(TARGET2)", asm_out_file);
18886 fputc ('\n', asm_out_file);
18890 #endif /* TARGET_UNWIND_INFO */
18893 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
18894 stack alignment. */
18897 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
18899 rtx unspec = SET_SRC (pattern);
18900 gcc_assert (GET_CODE (unspec) == UNSPEC);
18904 case UNSPEC_STACK_ALIGN:
18905 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
18906 put anything on the stack, so hopefully it won't matter.
18907 CFA = SP will be correct after alignment. */
18908 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
18909 SET_DEST (pattern));
18912 gcc_unreachable ();
18917 /* Output unwind directives for the start/end of a function. */
18920 arm_output_fn_unwind (FILE * f, bool prologue)
18922 if (!ARM_EABI_UNWIND_TABLES)
18926 fputs ("\t.fnstart\n", f);
18929 /* If this function will never be unwound, then mark it as such.
18930 The came condition is used in arm_unwind_emit to suppress
18931 the frame annotations. */
18932 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
18933 && (TREE_NOTHROW (current_function_decl)
18934 || crtl->all_throwers_are_sibcalls))
18935 fputs("\t.cantunwind\n", f);
18937 fputs ("\t.fnend\n", f);
18942 arm_emit_tls_decoration (FILE *fp, rtx x)
18944 enum tls_reloc reloc;
18947 val = XVECEXP (x, 0, 0);
18948 reloc = INTVAL (XVECEXP (x, 0, 1));
18950 output_addr_const (fp, val);
18955 fputs ("(tlsgd)", fp);
18958 fputs ("(tlsldm)", fp);
18961 fputs ("(tlsldo)", fp);
18964 fputs ("(gottpoff)", fp);
18967 fputs ("(tpoff)", fp);
18970 gcc_unreachable ();
18978 fputs (" + (. - ", fp);
18979 output_addr_const (fp, XVECEXP (x, 0, 2));
18981 output_addr_const (fp, XVECEXP (x, 0, 3));
18991 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
18994 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
18996 gcc_assert (size == 4);
18997 fputs ("\t.word\t", file);
18998 output_addr_const (file, x);
18999 fputs ("(tlsldo)", file);
19003 arm_output_addr_const_extra (FILE *fp, rtx x)
19005 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
19006 return arm_emit_tls_decoration (fp, x);
19007 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
19010 int labelno = INTVAL (XVECEXP (x, 0, 0));
19012 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
19013 assemble_name_raw (fp, label);
19017 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
19019 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
19023 output_addr_const (fp, XVECEXP (x, 0, 0));
19027 else if (GET_CODE (x) == CONST_VECTOR)
19028 return arm_emit_vector_const (fp, x);
19033 /* Output assembly for a shift instruction.
19034 SET_FLAGS determines how the instruction modifies the condition codes.
19035 0 - Do not set condition codes.
19036 1 - Set condition codes.
19037 2 - Use smallest instruction. */
19039 arm_output_shift(rtx * operands, int set_flags)
19042 static const char flag_chars[3] = {'?', '.', '!'};
19047 c = flag_chars[set_flags];
19048 if (TARGET_UNIFIED_ASM)
19050 shift = shift_op(operands[3], &val);
19054 operands[2] = GEN_INT(val);
19055 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
19058 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
19061 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
19062 output_asm_insn (pattern, operands);
19066 /* Output a Thumb-2 casesi instruction. */
19068 thumb2_output_casesi (rtx *operands)
19070 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
19072 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
19074 output_asm_insn ("cmp\t%0, %1", operands);
19075 output_asm_insn ("bhi\t%l3", operands);
19076 switch (GET_MODE(diff_vec))
19079 return "tbb\t[%|pc, %0]";
19081 return "tbh\t[%|pc, %0, lsl #1]";
19085 output_asm_insn ("adr\t%4, %l2", operands);
19086 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
19087 output_asm_insn ("add\t%4, %4, %5", operands);
19092 output_asm_insn ("adr\t%4, %l2", operands);
19093 return "ldr\t%|pc, [%4, %0, lsl #2]";
19096 gcc_unreachable ();
19100 /* Most ARM cores are single issue, but some newer ones can dual issue.
19101 The scheduler descriptions rely on this being correct. */
19103 arm_issue_rate (void)
19118 /* A table and a function to perform ARM-specific name mangling for
19119 NEON vector types in order to conform to the AAPCS (see "Procedure
19120 Call Standard for the ARM Architecture", Appendix A). To qualify
19121 for emission with the mangled names defined in that document, a
19122 vector type must not only be of the correct mode but also be
19123 composed of NEON vector element types (e.g. __builtin_neon_qi). */
19126 enum machine_mode mode;
19127 const char *element_type_name;
19128 const char *aapcs_name;
19129 } arm_mangle_map_entry;
19131 static arm_mangle_map_entry arm_mangle_map[] = {
19132 /* 64-bit containerized types. */
19133 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
19134 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
19135 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
19136 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
19137 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
19138 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
19139 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
19140 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
19141 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
19142 /* 128-bit containerized types. */
19143 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
19144 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
19145 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
19146 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
19147 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
19148 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
19149 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
19150 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
19151 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
19152 { VOIDmode, NULL, NULL }
19156 arm_mangle_type (const_tree type)
19158 arm_mangle_map_entry *pos = arm_mangle_map;
19160 if (TREE_CODE (type) != VECTOR_TYPE)
19163 /* Check the mode of the vector type, and the name of the vector
19164 element type, against the table. */
19165 while (pos->mode != VOIDmode)
19167 tree elt_type = TREE_TYPE (type);
19169 if (pos->mode == TYPE_MODE (type)
19170 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
19171 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
19172 pos->element_type_name))
19173 return pos->aapcs_name;
19178 /* Use the default mangling for unrecognized (possibly user-defined)
19183 /* Order of allocation of core registers for Thumb: this allocation is
19184 written over the corresponding initial entries of the array
19185 initialized with REG_ALLOC_ORDER. We allocate all low registers
19186 first. Saving and restoring a low register is usually cheaper than
19187 using a call-clobbered high register. */
19189 static const int thumb_core_reg_alloc_order[] =
19191 3, 2, 1, 0, 4, 5, 6, 7,
19192 14, 12, 8, 9, 10, 11, 13, 15
19195 /* Adjust register allocation order when compiling for Thumb. */
19198 arm_order_regs_for_local_alloc (void)
19200 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
19201 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
19203 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
19204 sizeof (thumb_core_reg_alloc_order));
19207 /* Set default optimization options. */
19209 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
19211 /* Enable section anchors by default at -O1 or higher.
19212 Use 2 to distinguish from an explicit -fsection-anchors
19213 given on the command line. */
19215 flag_section_anchors = 2;
19218 #include "gt-arm.h"