1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "conditions.h"
38 #include "insn-attr.h"
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
58 /* Forward definitions of types. */
59 typedef struct minipool_node Mnode;
60 typedef struct minipool_fixup Mfix;
62 EXPORTED_CONST struct attribute_spec arm_attribute_table[];
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static int arm_compute_static_chain_stack_bytes (void);
68 static arm_stack_offsets *arm_get_frame_offsets (void);
69 static void arm_add_gc_roots (void);
70 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
71 HOST_WIDE_INT, rtx, rtx, int, int);
72 static unsigned bit_count (unsigned long);
73 static int arm_address_register_rtx_p (rtx, int);
74 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
75 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
76 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
77 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
78 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
79 inline static int thumb1_index_register_rtx_p (rtx, int);
80 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
81 static int thumb_far_jump_used_p (void);
82 static bool thumb_force_lr_save (void);
83 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
84 static rtx emit_sfm (int, int);
85 static unsigned arm_size_return_regs (void);
86 static bool arm_assemble_integer (rtx, unsigned int, int);
87 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
88 static arm_cc get_arm_condition_code (rtx);
89 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
90 static rtx is_jump_table (rtx);
91 static const char *output_multi_immediate (rtx *, const char *, const char *,
93 static const char *shift_op (rtx, HOST_WIDE_INT *);
94 static struct machine_function *arm_init_machine_status (void);
95 static void thumb_exit (FILE *, int);
96 static rtx is_jump_table (rtx);
97 static HOST_WIDE_INT get_jump_table_size (rtx);
98 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
99 static Mnode *add_minipool_forward_ref (Mfix *);
100 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
101 static Mnode *add_minipool_backward_ref (Mfix *);
102 static void assign_minipool_offsets (Mfix *);
103 static void arm_print_value (FILE *, rtx);
104 static void dump_minipool (rtx);
105 static int arm_barrier_cost (rtx);
106 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
107 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
108 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
110 static void arm_reorg (void);
111 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
112 static unsigned long arm_compute_save_reg0_reg12_mask (void);
113 static unsigned long arm_compute_save_reg_mask (void);
114 static unsigned long arm_isr_value (tree);
115 static unsigned long arm_compute_func_type (void);
116 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
117 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
118 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
119 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
121 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
122 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
123 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
124 static int arm_comp_type_attributes (const_tree, const_tree);
125 static void arm_set_default_type_attributes (tree);
126 static int arm_adjust_cost (rtx, rtx, rtx, int);
127 static int count_insns_for_constant (HOST_WIDE_INT, int);
128 static int arm_get_strip_length (int);
129 static bool arm_function_ok_for_sibcall (tree, tree);
130 static void arm_internal_label (FILE *, const char *, unsigned long);
131 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
133 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
134 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
135 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
136 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
137 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
138 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
139 static bool arm_rtx_costs (rtx, int, int, int *, bool);
140 static int arm_address_cost (rtx, bool);
141 static bool arm_memory_load_p (rtx);
142 static bool arm_cirrus_insn_p (rtx);
143 static void cirrus_reorg (rtx);
144 static void arm_init_builtins (void);
145 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
146 static void arm_init_iwmmxt_builtins (void);
147 static rtx safe_vector_operand (rtx, enum machine_mode);
148 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
149 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
150 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
151 static void emit_constant_insn (rtx cond, rtx pattern);
152 static rtx emit_set_insn (rtx, rtx);
153 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
156 #ifdef OBJECT_FORMAT_ELF
157 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
158 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
161 static void arm_encode_section_info (tree, rtx, int);
164 static void arm_file_end (void);
165 static void arm_file_start (void);
167 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
169 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
170 enum machine_mode, const_tree, bool);
171 static bool arm_promote_prototypes (const_tree);
172 static bool arm_default_short_enums (void);
173 static bool arm_align_anon_bitfield (void);
174 static bool arm_return_in_msb (const_tree);
175 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
176 static bool arm_return_in_memory (const_tree, const_tree);
177 #ifdef TARGET_UNWIND_INFO
178 static void arm_unwind_emit (FILE *, rtx);
179 static bool arm_output_ttype (rtx);
181 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
183 static tree arm_cxx_guard_type (void);
184 static bool arm_cxx_guard_mask_bit (void);
185 static tree arm_get_cookie_size (tree);
186 static bool arm_cookie_has_size (void);
187 static bool arm_cxx_cdtor_returns_this (void);
188 static bool arm_cxx_key_method_may_be_inline (void);
189 static void arm_cxx_determine_class_data_visibility (tree);
190 static bool arm_cxx_class_data_always_comdat (void);
191 static bool arm_cxx_use_aeabi_atexit (void);
192 static void arm_init_libfuncs (void);
193 static tree arm_build_builtin_va_list (void);
194 static void arm_expand_builtin_va_start (tree, rtx);
195 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
196 static bool arm_handle_option (size_t, const char *, int);
197 static void arm_target_help (void);
198 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
199 static bool arm_cannot_copy_insn_p (rtx);
200 static bool arm_tls_symbol_p (rtx x);
201 static int arm_issue_rate (void);
202 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
203 static bool arm_allocate_stack_slots_for_args (void);
204 static const char *arm_invalid_parameter_type (const_tree t);
205 static const char *arm_invalid_return_type (const_tree t);
206 static tree arm_promoted_type (const_tree t);
207 static tree arm_convert_to_type (tree type, tree expr);
210 /* Initialize the GCC target structure. */
211 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
212 #undef TARGET_MERGE_DECL_ATTRIBUTES
213 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
216 #undef TARGET_LEGITIMIZE_ADDRESS
217 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
219 #undef TARGET_ATTRIBUTE_TABLE
220 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
222 #undef TARGET_ASM_FILE_START
223 #define TARGET_ASM_FILE_START arm_file_start
224 #undef TARGET_ASM_FILE_END
225 #define TARGET_ASM_FILE_END arm_file_end
227 #undef TARGET_ASM_ALIGNED_SI_OP
228 #define TARGET_ASM_ALIGNED_SI_OP NULL
229 #undef TARGET_ASM_INTEGER
230 #define TARGET_ASM_INTEGER arm_assemble_integer
232 #undef TARGET_ASM_FUNCTION_PROLOGUE
233 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
235 #undef TARGET_ASM_FUNCTION_EPILOGUE
236 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
238 #undef TARGET_DEFAULT_TARGET_FLAGS
239 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
240 #undef TARGET_HANDLE_OPTION
241 #define TARGET_HANDLE_OPTION arm_handle_option
243 #define TARGET_HELP arm_target_help
245 #undef TARGET_COMP_TYPE_ATTRIBUTES
246 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
248 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
249 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
251 #undef TARGET_SCHED_ADJUST_COST
252 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
254 #undef TARGET_ENCODE_SECTION_INFO
256 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
258 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
261 #undef TARGET_STRIP_NAME_ENCODING
262 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
264 #undef TARGET_ASM_INTERNAL_LABEL
265 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
267 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
268 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
270 #undef TARGET_ASM_OUTPUT_MI_THUNK
271 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
272 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
273 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
275 #undef TARGET_RTX_COSTS
276 #define TARGET_RTX_COSTS arm_rtx_costs
277 #undef TARGET_ADDRESS_COST
278 #define TARGET_ADDRESS_COST arm_address_cost
280 #undef TARGET_SHIFT_TRUNCATION_MASK
281 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
282 #undef TARGET_VECTOR_MODE_SUPPORTED_P
283 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
285 #undef TARGET_MACHINE_DEPENDENT_REORG
286 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
288 #undef TARGET_INIT_BUILTINS
289 #define TARGET_INIT_BUILTINS arm_init_builtins
290 #undef TARGET_EXPAND_BUILTIN
291 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
293 #undef TARGET_INIT_LIBFUNCS
294 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
296 #undef TARGET_PROMOTE_FUNCTION_ARGS
297 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
298 #undef TARGET_PROMOTE_FUNCTION_RETURN
299 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
300 #undef TARGET_PROMOTE_PROTOTYPES
301 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
302 #undef TARGET_PASS_BY_REFERENCE
303 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
304 #undef TARGET_ARG_PARTIAL_BYTES
305 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
307 #undef TARGET_SETUP_INCOMING_VARARGS
308 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
310 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
311 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
313 #undef TARGET_DEFAULT_SHORT_ENUMS
314 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
316 #undef TARGET_ALIGN_ANON_BITFIELD
317 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
319 #undef TARGET_NARROW_VOLATILE_BITFIELD
320 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
322 #undef TARGET_CXX_GUARD_TYPE
323 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
325 #undef TARGET_CXX_GUARD_MASK_BIT
326 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
328 #undef TARGET_CXX_GET_COOKIE_SIZE
329 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
331 #undef TARGET_CXX_COOKIE_HAS_SIZE
332 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
334 #undef TARGET_CXX_CDTOR_RETURNS_THIS
335 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
337 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
338 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
340 #undef TARGET_CXX_USE_AEABI_ATEXIT
341 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
343 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
344 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
345 arm_cxx_determine_class_data_visibility
347 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
348 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
350 #undef TARGET_RETURN_IN_MSB
351 #define TARGET_RETURN_IN_MSB arm_return_in_msb
353 #undef TARGET_RETURN_IN_MEMORY
354 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
356 #undef TARGET_MUST_PASS_IN_STACK
357 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
359 #ifdef TARGET_UNWIND_INFO
360 #undef TARGET_UNWIND_EMIT
361 #define TARGET_UNWIND_EMIT arm_unwind_emit
363 /* EABI unwinding tables use a different format for the typeinfo tables. */
364 #undef TARGET_ASM_TTYPE
365 #define TARGET_ASM_TTYPE arm_output_ttype
367 #undef TARGET_ARM_EABI_UNWINDER
368 #define TARGET_ARM_EABI_UNWINDER true
369 #endif /* TARGET_UNWIND_INFO */
371 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
372 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
374 #undef TARGET_CANNOT_COPY_INSN_P
375 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
378 #undef TARGET_HAVE_TLS
379 #define TARGET_HAVE_TLS true
382 #undef TARGET_CANNOT_FORCE_CONST_MEM
383 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
385 #undef TARGET_MAX_ANCHOR_OFFSET
386 #define TARGET_MAX_ANCHOR_OFFSET 4095
388 /* The minimum is set such that the total size of the block
389 for a particular anchor is -4088 + 1 + 4095 bytes, which is
390 divisible by eight, ensuring natural spacing of anchors. */
391 #undef TARGET_MIN_ANCHOR_OFFSET
392 #define TARGET_MIN_ANCHOR_OFFSET -4088
394 #undef TARGET_SCHED_ISSUE_RATE
395 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
397 #undef TARGET_MANGLE_TYPE
398 #define TARGET_MANGLE_TYPE arm_mangle_type
400 #undef TARGET_BUILD_BUILTIN_VA_LIST
401 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
402 #undef TARGET_EXPAND_BUILTIN_VA_START
403 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
404 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
405 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
408 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
409 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
412 #undef TARGET_LEGITIMATE_ADDRESS_P
413 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
415 #undef TARGET_INVALID_PARAMETER_TYPE
416 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
418 #undef TARGET_INVALID_RETURN_TYPE
419 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
421 #undef TARGET_PROMOTED_TYPE
422 #define TARGET_PROMOTED_TYPE arm_promoted_type
424 #undef TARGET_CONVERT_TO_TYPE
425 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
427 struct gcc_target targetm = TARGET_INITIALIZER;
429 /* Obstack for minipool constant handling. */
430 static struct obstack minipool_obstack;
431 static char * minipool_startobj;
433 /* The maximum number of insns skipped which
434 will be conditionalised if possible. */
435 static int max_insns_skipped = 5;
437 extern FILE * asm_out_file;
439 /* True if we are currently building a constant table. */
440 int making_const_table;
442 /* The processor for which instructions should be scheduled. */
443 enum processor_type arm_tune = arm_none;
445 /* The default processor used if not overridden by commandline. */
446 static enum processor_type arm_default_cpu = arm_none;
448 /* Which floating point model to use. */
449 enum arm_fp_model arm_fp_model;
451 /* Which floating point hardware is available. */
452 enum fputype arm_fpu_arch;
454 /* Which floating point hardware to schedule for. */
455 enum fputype arm_fpu_tune;
457 /* Whether to use floating point hardware. */
458 enum float_abi_type arm_float_abi;
460 /* Which __fp16 format to use. */
461 enum arm_fp16_format_type arm_fp16_format;
463 /* Which ABI to use. */
464 enum arm_abi_type arm_abi;
466 /* Which thread pointer model to use. */
467 enum arm_tp_type target_thread_pointer = TP_AUTO;
469 /* Used to parse -mstructure_size_boundary command line option. */
470 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
472 /* Used for Thumb call_via trampolines. */
473 rtx thumb_call_via_label[14];
474 static int thumb_call_reg_needed;
476 /* Bit values used to identify processor capabilities. */
477 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
478 #define FL_ARCH3M (1 << 1) /* Extended multiply */
479 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
480 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
481 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
482 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
483 #define FL_THUMB (1 << 6) /* Thumb aware */
484 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
485 #define FL_STRONG (1 << 8) /* StrongARM */
486 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
487 #define FL_XSCALE (1 << 10) /* XScale */
488 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
489 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
490 media instructions. */
491 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
492 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
493 Note: ARM6 & 7 derivatives only. */
494 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
495 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
496 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
498 #define FL_DIV (1 << 18) /* Hardware divide. */
499 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
500 #define FL_NEON (1 << 20) /* Neon instructions. */
502 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
504 #define FL_FOR_ARCH2 FL_NOTM
505 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
506 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
507 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
508 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
509 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
510 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
511 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
512 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
513 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
514 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
515 #define FL_FOR_ARCH6J FL_FOR_ARCH6
516 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
517 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
518 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
519 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
520 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
521 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
522 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
523 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
524 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
526 /* The bits in this mask specify which
527 instructions we are allowed to generate. */
528 static unsigned long insn_flags = 0;
530 /* The bits in this mask specify which instruction scheduling options should
532 static unsigned long tune_flags = 0;
534 /* The following are used in the arm.md file as equivalents to bits
535 in the above two flag variables. */
537 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
540 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
543 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
546 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
549 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
552 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
555 /* Nonzero if this chip supports the ARM 6K extensions. */
558 /* Nonzero if instructions not present in the 'M' profile can be used. */
559 int arm_arch_notm = 0;
561 /* Nonzero if this chip can benefit from load scheduling. */
562 int arm_ld_sched = 0;
564 /* Nonzero if this chip is a StrongARM. */
565 int arm_tune_strongarm = 0;
567 /* Nonzero if this chip is a Cirrus variant. */
568 int arm_arch_cirrus = 0;
570 /* Nonzero if this chip supports Intel Wireless MMX technology. */
571 int arm_arch_iwmmxt = 0;
573 /* Nonzero if this chip is an XScale. */
574 int arm_arch_xscale = 0;
576 /* Nonzero if tuning for XScale */
577 int arm_tune_xscale = 0;
579 /* Nonzero if we want to tune for stores that access the write-buffer.
580 This typically means an ARM6 or ARM7 with MMU or MPU. */
581 int arm_tune_wbuf = 0;
583 /* Nonzero if tuning for Cortex-A9. */
584 int arm_tune_cortex_a9 = 0;
586 /* Nonzero if generating Thumb instructions. */
589 /* Nonzero if we should define __THUMB_INTERWORK__ in the
591 XXX This is a bit of a hack, it's intended to help work around
592 problems in GLD which doesn't understand that armv5t code is
593 interworking clean. */
594 int arm_cpp_interwork = 0;
596 /* Nonzero if chip supports Thumb 2. */
599 /* Nonzero if chip supports integer division instruction. */
602 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
603 must report the mode of the memory reference from PRINT_OPERAND to
604 PRINT_OPERAND_ADDRESS. */
605 enum machine_mode output_memory_reference_mode;
607 /* The register number to be used for the PIC offset register. */
608 unsigned arm_pic_register = INVALID_REGNUM;
610 /* Set to 1 after arm_reorg has started. Reset to start at the start of
611 the next function. */
612 static int after_arm_reorg = 0;
614 /* The maximum number of insns to be used when loading a constant. */
615 static int arm_constant_limit = 3;
617 /* For an explanation of these variables, see final_prescan_insn below. */
619 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
620 enum arm_cond_code arm_current_cc;
622 int arm_target_label;
623 /* The number of conditionally executed insns, including the current insn. */
624 int arm_condexec_count = 0;
625 /* A bitmask specifying the patterns for the IT block.
626 Zero means do not output an IT block before this insn. */
627 int arm_condexec_mask = 0;
628 /* The number of bits used in arm_condexec_mask. */
629 int arm_condexec_masklen = 0;
631 /* The condition codes of the ARM, and the inverse function. */
632 static const char * const arm_condition_codes[] =
634 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
635 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
638 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
639 #define streq(string1, string2) (strcmp (string1, string2) == 0)
641 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
642 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
643 | (1 << PIC_OFFSET_TABLE_REGNUM)))
645 /* Initialization code. */
649 const char *const name;
650 enum processor_type core;
652 const unsigned long flags;
653 bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
656 /* Not all of these give usefully different compilation alternatives,
657 but there is no simple way of generalizing them. */
658 static const struct processors all_cores[] =
661 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
662 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
663 #include "arm-cores.def"
665 {NULL, arm_none, NULL, 0, NULL}
668 static const struct processors all_architectures[] =
670 /* ARM Architectures */
671 /* We don't specify rtx_costs here as it will be figured out
674 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
675 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
676 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
677 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
678 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
679 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
680 implementations that support it, so we will leave it out for now. */
681 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
682 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
683 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
684 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
685 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
686 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
687 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
688 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
689 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
690 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
691 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
692 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
693 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
694 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
695 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
696 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
697 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
698 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
699 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
700 {NULL, arm_none, NULL, 0 , NULL}
703 struct arm_cpu_select
707 const struct processors * processors;
710 /* This is a magic structure. The 'string' field is magically filled in
711 with a pointer to the value specified by the user on the command line
712 assuming that the user has specified such a value. */
714 static struct arm_cpu_select arm_select[] =
716 /* string name processors */
717 { NULL, "-mcpu=", all_cores },
718 { NULL, "-march=", all_architectures },
719 { NULL, "-mtune=", all_cores }
722 /* Defines representing the indexes into the above table. */
723 #define ARM_OPT_SET_CPU 0
724 #define ARM_OPT_SET_ARCH 1
725 #define ARM_OPT_SET_TUNE 2
727 /* The name of the preprocessor macro to define for this architecture. */
729 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
738 /* Available values for -mfpu=. */
740 static const struct fpu_desc all_fpus[] =
742 {"fpa", FPUTYPE_FPA},
743 {"fpe2", FPUTYPE_FPA_EMU2},
744 {"fpe3", FPUTYPE_FPA_EMU2},
745 {"maverick", FPUTYPE_MAVERICK},
746 {"vfp", FPUTYPE_VFP},
747 {"vfp3", FPUTYPE_VFP3},
748 {"vfpv3", FPUTYPE_VFP3},
749 {"vfpv3-d16", FPUTYPE_VFP3D16},
750 {"neon", FPUTYPE_NEON},
751 {"neon-fp16", FPUTYPE_NEON_FP16}
755 /* Floating point models used by the different hardware.
756 See fputype in arm.h. */
758 static const enum arm_fp_model fp_model_for_fpu[] =
760 /* No FP hardware. */
761 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
762 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
763 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
764 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
765 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
766 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
767 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */
768 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
769 ARM_FP_MODEL_VFP, /* FPUTYPE_NEON */
770 ARM_FP_MODEL_VFP /* FPUTYPE_NEON_FP16 */
777 enum float_abi_type abi_type;
781 /* Available values for -mfloat-abi=. */
783 static const struct float_abi all_float_abis[] =
785 {"soft", ARM_FLOAT_ABI_SOFT},
786 {"softfp", ARM_FLOAT_ABI_SOFTFP},
787 {"hard", ARM_FLOAT_ABI_HARD}
794 enum arm_fp16_format_type fp16_format_type;
798 /* Available values for -mfp16-format=. */
800 static const struct fp16_format all_fp16_formats[] =
802 {"none", ARM_FP16_FORMAT_NONE},
803 {"ieee", ARM_FP16_FORMAT_IEEE},
804 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
811 enum arm_abi_type abi_type;
815 /* Available values for -mabi=. */
817 static const struct abi_name arm_all_abis[] =
819 {"apcs-gnu", ARM_ABI_APCS},
820 {"atpcs", ARM_ABI_ATPCS},
821 {"aapcs", ARM_ABI_AAPCS},
822 {"iwmmxt", ARM_ABI_IWMMXT},
823 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
826 /* Supported TLS relocations. */
836 /* Emit an insn that's a simple single-set. Both the operands must be known
839 emit_set_insn (rtx x, rtx y)
841 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
844 /* Return the number of bits set in VALUE. */
846 bit_count (unsigned long value)
848 unsigned long count = 0;
853 value &= value - 1; /* Clear the least-significant set bit. */
859 /* Set up library functions unique to ARM. */
862 arm_init_libfuncs (void)
864 /* There are no special library functions unless we are using the
869 /* The functions below are described in Section 4 of the "Run-Time
870 ABI for the ARM architecture", Version 1.0. */
872 /* Double-precision floating-point arithmetic. Table 2. */
873 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
874 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
875 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
876 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
877 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
879 /* Double-precision comparisons. Table 3. */
880 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
881 set_optab_libfunc (ne_optab, DFmode, NULL);
882 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
883 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
884 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
885 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
886 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
888 /* Single-precision floating-point arithmetic. Table 4. */
889 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
890 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
891 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
892 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
893 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
895 /* Single-precision comparisons. Table 5. */
896 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
897 set_optab_libfunc (ne_optab, SFmode, NULL);
898 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
899 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
900 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
901 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
902 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
904 /* Floating-point to integer conversions. Table 6. */
905 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
906 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
907 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
908 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
909 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
910 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
911 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
912 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
914 /* Conversions between floating types. Table 7. */
915 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
916 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
918 /* Integer to floating-point conversions. Table 8. */
919 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
920 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
921 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
922 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
923 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
924 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
925 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
926 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
928 /* Long long. Table 9. */
929 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
930 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
931 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
932 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
933 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
934 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
935 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
936 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
938 /* Integer (32/32->32) division. \S 4.3.1. */
939 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
940 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
942 /* The divmod functions are designed so that they can be used for
943 plain division, even though they return both the quotient and the
944 remainder. The quotient is returned in the usual location (i.e.,
945 r0 for SImode, {r0, r1} for DImode), just as would be expected
946 for an ordinary division routine. Because the AAPCS calling
947 conventions specify that all of { r0, r1, r2, r3 } are
948 callee-saved registers, there is no need to tell the compiler
949 explicitly that those registers are clobbered by these
951 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
952 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
954 /* For SImode division the ABI provides div-without-mod routines,
956 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
957 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
959 /* We don't have mod libcalls. Fortunately gcc knows how to use the
960 divmod libcalls instead. */
961 set_optab_libfunc (smod_optab, DImode, NULL);
962 set_optab_libfunc (umod_optab, DImode, NULL);
963 set_optab_libfunc (smod_optab, SImode, NULL);
964 set_optab_libfunc (umod_optab, SImode, NULL);
966 /* Half-precision float operations. The compiler handles all operations
967 with NULL libfuncs by converting the SFmode. */
968 switch (arm_fp16_format)
970 case ARM_FP16_FORMAT_IEEE:
971 case ARM_FP16_FORMAT_ALTERNATIVE:
974 set_conv_libfunc (trunc_optab, HFmode, SFmode,
975 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
977 : "__gnu_f2h_alternative"));
978 set_conv_libfunc (sext_optab, SFmode, HFmode,
979 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
981 : "__gnu_h2f_alternative"));
984 set_optab_libfunc (add_optab, HFmode, NULL);
985 set_optab_libfunc (sdiv_optab, HFmode, NULL);
986 set_optab_libfunc (smul_optab, HFmode, NULL);
987 set_optab_libfunc (neg_optab, HFmode, NULL);
988 set_optab_libfunc (sub_optab, HFmode, NULL);
991 set_optab_libfunc (eq_optab, HFmode, NULL);
992 set_optab_libfunc (ne_optab, HFmode, NULL);
993 set_optab_libfunc (lt_optab, HFmode, NULL);
994 set_optab_libfunc (le_optab, HFmode, NULL);
995 set_optab_libfunc (ge_optab, HFmode, NULL);
996 set_optab_libfunc (gt_optab, HFmode, NULL);
997 set_optab_libfunc (unord_optab, HFmode, NULL);
1005 /* On AAPCS systems, this is the "struct __va_list". */
1006 static GTY(()) tree va_list_type;
1008 /* Return the type to use as __builtin_va_list. */
1010 arm_build_builtin_va_list (void)
1015 if (!TARGET_AAPCS_BASED)
1016 return std_build_builtin_va_list ();
1018 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1026 The C Library ABI further reinforces this definition in \S
1029 We must follow this definition exactly. The structure tag
1030 name is visible in C++ mangled names, and thus forms a part
1031 of the ABI. The field name may be used by people who
1032 #include <stdarg.h>. */
1033 /* Create the type. */
1034 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1035 /* Give it the required name. */
1036 va_list_name = build_decl (BUILTINS_LOCATION,
1038 get_identifier ("__va_list"),
1040 DECL_ARTIFICIAL (va_list_name) = 1;
1041 TYPE_NAME (va_list_type) = va_list_name;
1042 /* Create the __ap field. */
1043 ap_field = build_decl (BUILTINS_LOCATION,
1045 get_identifier ("__ap"),
1047 DECL_ARTIFICIAL (ap_field) = 1;
1048 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1049 TYPE_FIELDS (va_list_type) = ap_field;
1050 /* Compute its layout. */
1051 layout_type (va_list_type);
1053 return va_list_type;
1056 /* Return an expression of type "void *" pointing to the next
1057 available argument in a variable-argument list. VALIST is the
1058 user-level va_list object, of type __builtin_va_list. */
1060 arm_extract_valist_ptr (tree valist)
1062 if (TREE_TYPE (valist) == error_mark_node)
1063 return error_mark_node;
1065 /* On an AAPCS target, the pointer is stored within "struct
1067 if (TARGET_AAPCS_BASED)
1069 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1070 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1071 valist, ap_field, NULL_TREE);
1077 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1079 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1081 valist = arm_extract_valist_ptr (valist);
1082 std_expand_builtin_va_start (valist, nextarg);
1085 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1087 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1090 valist = arm_extract_valist_ptr (valist);
1091 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1094 /* Implement TARGET_HANDLE_OPTION. */
1097 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1102 arm_select[1].string = arg;
1106 arm_select[0].string = arg;
1109 case OPT_mhard_float:
1110 target_float_abi_name = "hard";
1113 case OPT_msoft_float:
1114 target_float_abi_name = "soft";
1118 arm_select[2].string = arg;
1127 arm_target_help (void)
1130 static int columns = 0;
1133 /* If we have not done so already, obtain the desired maximum width of
1134 the output. Note - this is a duplication of the code at the start of
1135 gcc/opts.c:print_specific_help() - the two copies should probably be
1136 replaced by a single function. */
1141 GET_ENVIRONMENT (p, "COLUMNS");
1144 int value = atoi (p);
1151 /* Use a reasonable default. */
1155 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1157 /* The - 2 is because we know that the last entry in the array is NULL. */
1158 i = ARRAY_SIZE (all_cores) - 2;
1160 printf (" %s", all_cores[i].name);
1161 remaining = columns - (strlen (all_cores[i].name) + 4);
1162 gcc_assert (remaining >= 0);
1166 int len = strlen (all_cores[i].name);
1168 if (remaining > len + 2)
1170 printf (", %s", all_cores[i].name);
1171 remaining -= len + 2;
1177 printf ("\n %s", all_cores[i].name);
1178 remaining = columns - (len + 4);
1182 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1184 i = ARRAY_SIZE (all_architectures) - 2;
1187 printf (" %s", all_architectures[i].name);
1188 remaining = columns - (strlen (all_architectures[i].name) + 4);
1189 gcc_assert (remaining >= 0);
1193 int len = strlen (all_architectures[i].name);
1195 if (remaining > len + 2)
1197 printf (", %s", all_architectures[i].name);
1198 remaining -= len + 2;
1204 printf ("\n %s", all_architectures[i].name);
1205 remaining = columns - (len + 4);
1212 /* Fix up any incompatible options that the user has specified.
1213 This has now turned into a maze. */
1215 arm_override_options (void)
1218 enum processor_type target_arch_cpu = arm_none;
1219 enum processor_type selected_cpu = arm_none;
1221 /* Set up the flags based on the cpu/architecture selected by the user. */
1222 for (i = ARRAY_SIZE (arm_select); i--;)
1224 struct arm_cpu_select * ptr = arm_select + i;
1226 if (ptr->string != NULL && ptr->string[0] != '\0')
1228 const struct processors * sel;
1230 for (sel = ptr->processors; sel->name != NULL; sel++)
1231 if (streq (ptr->string, sel->name))
1233 /* Set the architecture define. */
1234 if (i != ARM_OPT_SET_TUNE)
1235 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1237 /* Determine the processor core for which we should
1238 tune code-generation. */
1239 if (/* -mcpu= is a sensible default. */
1240 i == ARM_OPT_SET_CPU
1241 /* -mtune= overrides -mcpu= and -march=. */
1242 || i == ARM_OPT_SET_TUNE)
1243 arm_tune = (enum processor_type) (sel - ptr->processors);
1245 /* Remember the CPU associated with this architecture.
1246 If no other option is used to set the CPU type,
1247 we'll use this to guess the most suitable tuning
1249 if (i == ARM_OPT_SET_ARCH)
1250 target_arch_cpu = sel->core;
1252 if (i == ARM_OPT_SET_CPU)
1253 selected_cpu = (enum processor_type) (sel - ptr->processors);
1255 if (i != ARM_OPT_SET_TUNE)
1257 /* If we have been given an architecture and a processor
1258 make sure that they are compatible. We only generate
1259 a warning though, and we prefer the CPU over the
1261 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1262 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1265 insn_flags = sel->flags;
1271 if (sel->name == NULL)
1272 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1276 /* Guess the tuning options from the architecture if necessary. */
1277 if (arm_tune == arm_none)
1278 arm_tune = target_arch_cpu;
1280 /* If the user did not specify a processor, choose one for them. */
1281 if (insn_flags == 0)
1283 const struct processors * sel;
1284 unsigned int sought;
1286 selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
1287 if (selected_cpu == arm_none)
1289 #ifdef SUBTARGET_CPU_DEFAULT
1290 /* Use the subtarget default CPU if none was specified by
1292 selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;
1294 /* Default to ARM6. */
1295 if (selected_cpu == arm_none)
1296 selected_cpu = arm6;
1298 sel = &all_cores[selected_cpu];
1300 insn_flags = sel->flags;
1302 /* Now check to see if the user has specified some command line
1303 switch that require certain abilities from the cpu. */
1306 if (TARGET_INTERWORK || TARGET_THUMB)
1308 sought |= (FL_THUMB | FL_MODE32);
1310 /* There are no ARM processors that support both APCS-26 and
1311 interworking. Therefore we force FL_MODE26 to be removed
1312 from insn_flags here (if it was set), so that the search
1313 below will always be able to find a compatible processor. */
1314 insn_flags &= ~FL_MODE26;
1317 if (sought != 0 && ((sought & insn_flags) != sought))
1319 /* Try to locate a CPU type that supports all of the abilities
1320 of the default CPU, plus the extra abilities requested by
1322 for (sel = all_cores; sel->name != NULL; sel++)
1323 if ((sel->flags & sought) == (sought | insn_flags))
1326 if (sel->name == NULL)
1328 unsigned current_bit_count = 0;
1329 const struct processors * best_fit = NULL;
1331 /* Ideally we would like to issue an error message here
1332 saying that it was not possible to find a CPU compatible
1333 with the default CPU, but which also supports the command
1334 line options specified by the programmer, and so they
1335 ought to use the -mcpu=<name> command line option to
1336 override the default CPU type.
1338 If we cannot find a cpu that has both the
1339 characteristics of the default cpu and the given
1340 command line options we scan the array again looking
1341 for a best match. */
1342 for (sel = all_cores; sel->name != NULL; sel++)
1343 if ((sel->flags & sought) == sought)
1347 count = bit_count (sel->flags & insn_flags);
1349 if (count >= current_bit_count)
1352 current_bit_count = count;
1356 gcc_assert (best_fit);
1360 insn_flags = sel->flags;
1362 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1363 arm_default_cpu = (enum processor_type) (sel - all_cores);
1364 if (arm_tune == arm_none)
1365 arm_tune = arm_default_cpu;
1368 /* The processor for which we should tune should now have been
1370 gcc_assert (arm_tune != arm_none);
1372 tune_flags = all_cores[(int)arm_tune].flags;
1374 if (target_fp16_format_name)
1376 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1378 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1380 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1384 if (i == ARRAY_SIZE (all_fp16_formats))
1385 error ("invalid __fp16 format option: -mfp16-format=%s",
1386 target_fp16_format_name);
1389 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1391 if (target_abi_name)
1393 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1395 if (streq (arm_all_abis[i].name, target_abi_name))
1397 arm_abi = arm_all_abis[i].abi_type;
1401 if (i == ARRAY_SIZE (arm_all_abis))
1402 error ("invalid ABI option: -mabi=%s", target_abi_name);
1405 arm_abi = ARM_DEFAULT_ABI;
1407 /* Make sure that the processor choice does not conflict with any of the
1408 other command line choices. */
1409 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1410 error ("target CPU does not support ARM mode");
1412 /* BPABI targets use linker tricks to allow interworking on cores
1413 without thumb support. */
1414 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1416 warning (0, "target CPU does not support interworking" );
1417 target_flags &= ~MASK_INTERWORK;
1420 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1422 warning (0, "target CPU does not support THUMB instructions");
1423 target_flags &= ~MASK_THUMB;
1426 if (TARGET_APCS_FRAME && TARGET_THUMB)
1428 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1429 target_flags &= ~MASK_APCS_FRAME;
1432 /* Callee super interworking implies thumb interworking. Adding
1433 this to the flags here simplifies the logic elsewhere. */
1434 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1435 target_flags |= MASK_INTERWORK;
1437 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1438 from here where no function is being compiled currently. */
1439 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1440 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1442 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1443 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1445 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1446 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1448 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1450 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1451 target_flags |= MASK_APCS_FRAME;
1454 if (TARGET_POKE_FUNCTION_NAME)
1455 target_flags |= MASK_APCS_FRAME;
1457 if (TARGET_APCS_REENT && flag_pic)
1458 error ("-fpic and -mapcs-reent are incompatible");
1460 if (TARGET_APCS_REENT)
1461 warning (0, "APCS reentrant code not supported. Ignored");
1463 /* If this target is normally configured to use APCS frames, warn if they
1464 are turned off and debugging is turned on. */
1466 && write_symbols != NO_DEBUG
1467 && !TARGET_APCS_FRAME
1468 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1469 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1471 if (TARGET_APCS_FLOAT)
1472 warning (0, "passing floating point arguments in fp regs not yet supported");
1474 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1475 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1476 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1477 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1478 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1479 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1480 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1481 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1482 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1483 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1484 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1485 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1487 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1488 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1489 thumb_code = (TARGET_ARM == 0);
1490 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1491 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1492 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1493 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1494 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1496 /* If we are not using the default (ARM mode) section anchor offset
1497 ranges, then set the correct ranges now. */
1500 /* Thumb-1 LDR instructions cannot have negative offsets.
1501 Permissible positive offset ranges are 5-bit (for byte loads),
1502 6-bit (for halfword loads), or 7-bit (for word loads).
1503 Empirical results suggest a 7-bit anchor range gives the best
1504 overall code size. */
1505 targetm.min_anchor_offset = 0;
1506 targetm.max_anchor_offset = 127;
1508 else if (TARGET_THUMB2)
1510 /* The minimum is set such that the total size of the block
1511 for a particular anchor is 248 + 1 + 4095 bytes, which is
1512 divisible by eight, ensuring natural spacing of anchors. */
1513 targetm.min_anchor_offset = -248;
1514 targetm.max_anchor_offset = 4095;
1517 /* V5 code we generate is completely interworking capable, so we turn off
1518 TARGET_INTERWORK here to avoid many tests later on. */
1520 /* XXX However, we must pass the right pre-processor defines to CPP
1521 or GLD can get confused. This is a hack. */
1522 if (TARGET_INTERWORK)
1523 arm_cpp_interwork = 1;
1526 target_flags &= ~MASK_INTERWORK;
1528 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1529 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1531 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1532 error ("iwmmxt abi requires an iwmmxt capable cpu");
1534 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1535 if (target_fpu_name == NULL && target_fpe_name != NULL)
1537 if (streq (target_fpe_name, "2"))
1538 target_fpu_name = "fpe2";
1539 else if (streq (target_fpe_name, "3"))
1540 target_fpu_name = "fpe3";
1542 error ("invalid floating point emulation option: -mfpe=%s",
1545 if (target_fpu_name != NULL)
1547 /* The user specified a FPU. */
1548 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1550 if (streq (all_fpus[i].name, target_fpu_name))
1552 arm_fpu_arch = all_fpus[i].fpu;
1553 arm_fpu_tune = arm_fpu_arch;
1554 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1558 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1559 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1563 #ifdef FPUTYPE_DEFAULT
1564 /* Use the default if it is specified for this platform. */
1565 arm_fpu_arch = FPUTYPE_DEFAULT;
1566 arm_fpu_tune = FPUTYPE_DEFAULT;
1568 /* Pick one based on CPU type. */
1569 /* ??? Some targets assume FPA is the default.
1570 if ((insn_flags & FL_VFP) != 0)
1571 arm_fpu_arch = FPUTYPE_VFP;
1574 if (arm_arch_cirrus)
1575 arm_fpu_arch = FPUTYPE_MAVERICK;
1577 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1579 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1580 arm_fpu_tune = FPUTYPE_FPA;
1582 arm_fpu_tune = arm_fpu_arch;
1583 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1584 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1587 if (target_float_abi_name != NULL)
1589 /* The user specified a FP ABI. */
1590 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1592 if (streq (all_float_abis[i].name, target_float_abi_name))
1594 arm_float_abi = all_float_abis[i].abi_type;
1598 if (i == ARRAY_SIZE (all_float_abis))
1599 error ("invalid floating point abi: -mfloat-abi=%s",
1600 target_float_abi_name);
1603 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1605 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1606 sorry ("-mfloat-abi=hard and VFP");
1608 if (TARGET_AAPCS_BASED
1609 && (arm_fp_model == ARM_FP_MODEL_FPA))
1610 error ("FPA is unsupported in the AAPCS");
1612 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1613 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1614 will ever exist. GCC makes no attempt to support this combination. */
1615 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1616 sorry ("iWMMXt and hardware floating point");
1618 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1619 if (TARGET_THUMB2 && TARGET_IWMMXT)
1620 sorry ("Thumb-2 iWMMXt");
1622 /* __fp16 support currently assumes the core has ldrh. */
1623 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1624 sorry ("__fp16 and no ldrh");
1626 /* If soft-float is specified then don't use FPU. */
1627 if (TARGET_SOFT_FLOAT)
1628 arm_fpu_arch = FPUTYPE_NONE;
1630 /* For arm2/3 there is no need to do any scheduling if there is only
1631 a floating point emulator, or we are doing software floating-point. */
1632 if ((TARGET_SOFT_FLOAT
1633 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1634 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1635 && (tune_flags & FL_MODE32) == 0)
1636 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1638 if (target_thread_switch)
1640 if (strcmp (target_thread_switch, "soft") == 0)
1641 target_thread_pointer = TP_SOFT;
1642 else if (strcmp (target_thread_switch, "auto") == 0)
1643 target_thread_pointer = TP_AUTO;
1644 else if (strcmp (target_thread_switch, "cp15") == 0)
1645 target_thread_pointer = TP_CP15;
1647 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1650 /* Use the cp15 method if it is available. */
1651 if (target_thread_pointer == TP_AUTO)
1653 if (arm_arch6k && !TARGET_THUMB)
1654 target_thread_pointer = TP_CP15;
1656 target_thread_pointer = TP_SOFT;
1659 if (TARGET_HARD_TP && TARGET_THUMB1)
1660 error ("can not use -mtp=cp15 with 16-bit Thumb");
1662 /* Override the default structure alignment for AAPCS ABI. */
1663 if (TARGET_AAPCS_BASED)
1664 arm_structure_size_boundary = 8;
1666 if (structure_size_string != NULL)
1668 int size = strtol (structure_size_string, NULL, 0);
1670 if (size == 8 || size == 32
1671 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1672 arm_structure_size_boundary = size;
1674 warning (0, "structure size boundary can only be set to %s",
1675 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1678 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1680 error ("RTP PIC is incompatible with Thumb");
1684 /* If stack checking is disabled, we can use r10 as the PIC register,
1685 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1686 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1688 if (TARGET_VXWORKS_RTP)
1689 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1690 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1693 if (flag_pic && TARGET_VXWORKS_RTP)
1694 arm_pic_register = 9;
1696 if (arm_pic_register_string != NULL)
1698 int pic_register = decode_reg_name (arm_pic_register_string);
1701 warning (0, "-mpic-register= is useless without -fpic");
1703 /* Prevent the user from choosing an obviously stupid PIC register. */
1704 else if (pic_register < 0 || call_used_regs[pic_register]
1705 || pic_register == HARD_FRAME_POINTER_REGNUM
1706 || pic_register == STACK_POINTER_REGNUM
1707 || pic_register >= PC_REGNUM
1708 || (TARGET_VXWORKS_RTP
1709 && (unsigned int) pic_register != arm_pic_register))
1710 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1712 arm_pic_register = pic_register;
1715 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1716 if (fix_cm3_ldrd == 2)
1718 if (selected_cpu == cortexm3)
1724 /* ??? We might want scheduling for thumb2. */
1725 if (TARGET_THUMB && flag_schedule_insns)
1727 /* Don't warn since it's on by default in -O2. */
1728 flag_schedule_insns = 0;
1733 arm_constant_limit = 1;
1735 /* If optimizing for size, bump the number of instructions that we
1736 are prepared to conditionally execute (even on a StrongARM). */
1737 max_insns_skipped = 6;
1741 /* For processors with load scheduling, it never costs more than
1742 2 cycles to load a constant, and the load scheduler may well
1743 reduce that to 1. */
1745 arm_constant_limit = 1;
1747 /* On XScale the longer latency of a load makes it more difficult
1748 to achieve a good schedule, so it's faster to synthesize
1749 constants that can be done in two insns. */
1750 if (arm_tune_xscale)
1751 arm_constant_limit = 2;
1753 /* StrongARM has early execution of branches, so a sequence
1754 that is worth skipping is shorter. */
1755 if (arm_tune_strongarm)
1756 max_insns_skipped = 3;
1759 /* Register global variables with the garbage collector. */
1760 arm_add_gc_roots ();
1764 arm_add_gc_roots (void)
1766 gcc_obstack_init(&minipool_obstack);
1767 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1770 /* A table of known ARM exception types.
1771 For use with the interrupt function attribute. */
1775 const char *const arg;
1776 const unsigned long return_value;
1780 static const isr_attribute_arg isr_attribute_args [] =
1782 { "IRQ", ARM_FT_ISR },
1783 { "irq", ARM_FT_ISR },
1784 { "FIQ", ARM_FT_FIQ },
1785 { "fiq", ARM_FT_FIQ },
1786 { "ABORT", ARM_FT_ISR },
1787 { "abort", ARM_FT_ISR },
1788 { "ABORT", ARM_FT_ISR },
1789 { "abort", ARM_FT_ISR },
1790 { "UNDEF", ARM_FT_EXCEPTION },
1791 { "undef", ARM_FT_EXCEPTION },
1792 { "SWI", ARM_FT_EXCEPTION },
1793 { "swi", ARM_FT_EXCEPTION },
1794 { NULL, ARM_FT_NORMAL }
1797 /* Returns the (interrupt) function type of the current
1798 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1800 static unsigned long
1801 arm_isr_value (tree argument)
1803 const isr_attribute_arg * ptr;
1807 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1809 /* No argument - default to IRQ. */
1810 if (argument == NULL_TREE)
1813 /* Get the value of the argument. */
1814 if (TREE_VALUE (argument) == NULL_TREE
1815 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1816 return ARM_FT_UNKNOWN;
1818 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1820 /* Check it against the list of known arguments. */
1821 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1822 if (streq (arg, ptr->arg))
1823 return ptr->return_value;
1825 /* An unrecognized interrupt type. */
1826 return ARM_FT_UNKNOWN;
1829 /* Computes the type of the current function. */
1831 static unsigned long
1832 arm_compute_func_type (void)
1834 unsigned long type = ARM_FT_UNKNOWN;
1838 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1840 /* Decide if the current function is volatile. Such functions
1841 never return, and many memory cycles can be saved by not storing
1842 register values that will never be needed again. This optimization
1843 was added to speed up context switching in a kernel application. */
1845 && (TREE_NOTHROW (current_function_decl)
1846 || !(flag_unwind_tables
1847 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1848 && TREE_THIS_VOLATILE (current_function_decl))
1849 type |= ARM_FT_VOLATILE;
1851 if (cfun->static_chain_decl != NULL)
1852 type |= ARM_FT_NESTED;
1854 attr = DECL_ATTRIBUTES (current_function_decl);
1856 a = lookup_attribute ("naked", attr);
1858 type |= ARM_FT_NAKED;
1860 a = lookup_attribute ("isr", attr);
1862 a = lookup_attribute ("interrupt", attr);
1865 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1867 type |= arm_isr_value (TREE_VALUE (a));
1872 /* Returns the type of the current function. */
1875 arm_current_func_type (void)
1877 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1878 cfun->machine->func_type = arm_compute_func_type ();
1880 return cfun->machine->func_type;
1884 arm_allocate_stack_slots_for_args (void)
1886 /* Naked functions should not allocate stack slots for arguments. */
1887 return !IS_NAKED (arm_current_func_type ());
1891 /* Return 1 if it is possible to return using a single instruction.
1892 If SIBLING is non-null, this is a test for a return before a sibling
1893 call. SIBLING is the call insn, so we can examine its register usage. */
1896 use_return_insn (int iscond, rtx sibling)
1899 unsigned int func_type;
1900 unsigned long saved_int_regs;
1901 unsigned HOST_WIDE_INT stack_adjust;
1902 arm_stack_offsets *offsets;
1904 /* Never use a return instruction before reload has run. */
1905 if (!reload_completed)
1908 func_type = arm_current_func_type ();
1910 /* Naked, volatile and stack alignment functions need special
1912 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
1915 /* So do interrupt functions that use the frame pointer and Thumb
1916 interrupt functions. */
1917 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
1920 offsets = arm_get_frame_offsets ();
1921 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1923 /* As do variadic functions. */
1924 if (crtl->args.pretend_args_size
1925 || cfun->machine->uses_anonymous_args
1926 /* Or if the function calls __builtin_eh_return () */
1927 || crtl->calls_eh_return
1928 /* Or if the function calls alloca */
1929 || cfun->calls_alloca
1930 /* Or if there is a stack adjustment. However, if the stack pointer
1931 is saved on the stack, we can use a pre-incrementing stack load. */
1932 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
1933 && stack_adjust == 4)))
1936 saved_int_regs = offsets->saved_regs_mask;
1938 /* Unfortunately, the insn
1940 ldmib sp, {..., sp, ...}
1942 triggers a bug on most SA-110 based devices, such that the stack
1943 pointer won't be correctly restored if the instruction takes a
1944 page fault. We work around this problem by popping r3 along with
1945 the other registers, since that is never slower than executing
1946 another instruction.
1948 We test for !arm_arch5 here, because code for any architecture
1949 less than this could potentially be run on one of the buggy
1951 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
1953 /* Validate that r3 is a call-clobbered register (always true in
1954 the default abi) ... */
1955 if (!call_used_regs[3])
1958 /* ... that it isn't being used for a return value ... */
1959 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1962 /* ... or for a tail-call argument ... */
1965 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1967 if (find_regno_fusage (sibling, USE, 3))
1971 /* ... and that there are no call-saved registers in r0-r2
1972 (always true in the default ABI). */
1973 if (saved_int_regs & 0x7)
1977 /* Can't be done if interworking with Thumb, and any registers have been
1979 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
1982 /* On StrongARM, conditional returns are expensive if they aren't
1983 taken and multiple registers have been stacked. */
1984 if (iscond && arm_tune_strongarm)
1986 /* Conditional return when just the LR is stored is a simple
1987 conditional-load instruction, that's not expensive. */
1988 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1992 && arm_pic_register != INVALID_REGNUM
1993 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
1997 /* If there are saved registers but the LR isn't saved, then we need
1998 two instructions for the return. */
1999 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2002 /* Can't be done if any of the FPA regs are pushed,
2003 since this also requires an insn. */
2004 if (TARGET_HARD_FLOAT && TARGET_FPA)
2005 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2006 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2009 /* Likewise VFP regs. */
2010 if (TARGET_HARD_FLOAT && TARGET_VFP)
2011 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2012 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2015 if (TARGET_REALLY_IWMMXT)
2016 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2017 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2023 /* Return TRUE if int I is a valid immediate ARM constant. */
2026 const_ok_for_arm (HOST_WIDE_INT i)
2030 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2031 be all zero, or all one. */
2032 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2033 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2034 != ((~(unsigned HOST_WIDE_INT) 0)
2035 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2038 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2040 /* Fast return for 0 and small values. We must do this for zero, since
2041 the code below can't handle that one case. */
2042 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2045 /* Get the number of trailing zeros. */
2046 lowbit = ffs((int) i) - 1;
2048 /* Only even shifts are allowed in ARM mode so round down to the
2049 nearest even number. */
2053 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2058 /* Allow rotated constants in ARM mode. */
2060 && ((i & ~0xc000003f) == 0
2061 || (i & ~0xf000000f) == 0
2062 || (i & ~0xfc000003) == 0))
2069 /* Allow repeated pattern. */
2072 if (i == v || i == (v | (v << 8)))
2079 /* Return true if I is a valid constant for the operation CODE. */
2081 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2083 if (const_ok_for_arm (i))
2107 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2109 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2115 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2119 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2126 /* Emit a sequence of insns to handle a large constant.
2127 CODE is the code of the operation required, it can be any of SET, PLUS,
2128 IOR, AND, XOR, MINUS;
2129 MODE is the mode in which the operation is being performed;
2130 VAL is the integer to operate on;
2131 SOURCE is the other operand (a register, or a null-pointer for SET);
2132 SUBTARGETS means it is safe to create scratch registers if that will
2133 either produce a simpler sequence, or we will want to cse the values.
2134 Return value is the number of insns emitted. */
2136 /* ??? Tweak this for thumb2. */
2138 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2139 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2143 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2144 cond = COND_EXEC_TEST (PATTERN (insn));
2148 if (subtargets || code == SET
2149 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2150 && REGNO (target) != REGNO (source)))
2152 /* After arm_reorg has been called, we can't fix up expensive
2153 constants by pushing them into memory so we must synthesize
2154 them in-line, regardless of the cost. This is only likely to
2155 be more costly on chips that have load delay slots and we are
2156 compiling without running the scheduler (so no splitting
2157 occurred before the final instruction emission).
2159 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2161 if (!after_arm_reorg
2163 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2165 > arm_constant_limit + (code != SET)))
2169 /* Currently SET is the only monadic value for CODE, all
2170 the rest are diadic. */
2171 if (TARGET_USE_MOVT)
2172 arm_emit_movpair (target, GEN_INT (val));
2174 emit_set_insn (target, GEN_INT (val));
2180 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2182 if (TARGET_USE_MOVT)
2183 arm_emit_movpair (temp, GEN_INT (val));
2185 emit_set_insn (temp, GEN_INT (val));
2187 /* For MINUS, the value is subtracted from, since we never
2188 have subtraction of a constant. */
2190 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2192 emit_set_insn (target,
2193 gen_rtx_fmt_ee (code, mode, source, temp));
2199 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2203 /* Return the number of ARM instructions required to synthesize the given
2206 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2208 HOST_WIDE_INT temp1;
2216 if (remainder & (3 << (i - 2)))
2221 temp1 = remainder & ((0x0ff << end)
2222 | ((i < end) ? (0xff >> (32 - end)) : 0));
2223 remainder &= ~temp1;
2228 } while (remainder);
2232 /* Emit an instruction with the indicated PATTERN. If COND is
2233 non-NULL, conditionalize the execution of the instruction on COND
2237 emit_constant_insn (rtx cond, rtx pattern)
2240 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2241 emit_insn (pattern);
2244 /* As above, but extra parameter GENERATE which, if clear, suppresses
2246 /* ??? This needs more work for thumb2. */
2249 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2250 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2255 int can_negate_initial = 0;
2258 int num_bits_set = 0;
2259 int set_sign_bit_copies = 0;
2260 int clear_sign_bit_copies = 0;
2261 int clear_zero_bit_copies = 0;
2262 int set_zero_bit_copies = 0;
2264 unsigned HOST_WIDE_INT temp1, temp2;
2265 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2267 /* Find out which operations are safe for a given CODE. Also do a quick
2268 check for degenerate cases; these can occur when DImode operations
2280 can_negate_initial = 1;
2284 if (remainder == 0xffffffff)
2287 emit_constant_insn (cond,
2288 gen_rtx_SET (VOIDmode, target,
2289 GEN_INT (ARM_SIGN_EXTEND (val))));
2295 if (reload_completed && rtx_equal_p (target, source))
2299 emit_constant_insn (cond,
2300 gen_rtx_SET (VOIDmode, target, source));
2312 emit_constant_insn (cond,
2313 gen_rtx_SET (VOIDmode, target, const0_rtx));
2316 if (remainder == 0xffffffff)
2318 if (reload_completed && rtx_equal_p (target, source))
2321 emit_constant_insn (cond,
2322 gen_rtx_SET (VOIDmode, target, source));
2331 if (reload_completed && rtx_equal_p (target, source))
2334 emit_constant_insn (cond,
2335 gen_rtx_SET (VOIDmode, target, source));
2339 /* We don't know how to handle other cases yet. */
2340 gcc_assert (remainder == 0xffffffff);
2343 emit_constant_insn (cond,
2344 gen_rtx_SET (VOIDmode, target,
2345 gen_rtx_NOT (mode, source)));
2349 /* We treat MINUS as (val - source), since (source - val) is always
2350 passed as (source + (-val)). */
2354 emit_constant_insn (cond,
2355 gen_rtx_SET (VOIDmode, target,
2356 gen_rtx_NEG (mode, source)));
2359 if (const_ok_for_arm (val))
2362 emit_constant_insn (cond,
2363 gen_rtx_SET (VOIDmode, target,
2364 gen_rtx_MINUS (mode, GEN_INT (val),
2376 /* If we can do it in one insn get out quickly. */
2377 if (const_ok_for_arm (val)
2378 || (can_negate_initial && const_ok_for_arm (-val))
2379 || (can_invert && const_ok_for_arm (~val)))
2382 emit_constant_insn (cond,
2383 gen_rtx_SET (VOIDmode, target,
2385 ? gen_rtx_fmt_ee (code, mode, source,
2391 /* Calculate a few attributes that may be useful for specific
2393 /* Count number of leading zeros. */
2394 for (i = 31; i >= 0; i--)
2396 if ((remainder & (1 << i)) == 0)
2397 clear_sign_bit_copies++;
2402 /* Count number of leading 1's. */
2403 for (i = 31; i >= 0; i--)
2405 if ((remainder & (1 << i)) != 0)
2406 set_sign_bit_copies++;
2411 /* Count number of trailing zero's. */
2412 for (i = 0; i <= 31; i++)
2414 if ((remainder & (1 << i)) == 0)
2415 clear_zero_bit_copies++;
2420 /* Count number of trailing 1's. */
2421 for (i = 0; i <= 31; i++)
2423 if ((remainder & (1 << i)) != 0)
2424 set_zero_bit_copies++;
2432 /* See if we can use movw. */
2433 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2436 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2441 /* See if we can do this by sign_extending a constant that is known
2442 to be negative. This is a good, way of doing it, since the shift
2443 may well merge into a subsequent insn. */
2444 if (set_sign_bit_copies > 1)
2446 if (const_ok_for_arm
2447 (temp1 = ARM_SIGN_EXTEND (remainder
2448 << (set_sign_bit_copies - 1))))
2452 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2453 emit_constant_insn (cond,
2454 gen_rtx_SET (VOIDmode, new_src,
2456 emit_constant_insn (cond,
2457 gen_ashrsi3 (target, new_src,
2458 GEN_INT (set_sign_bit_copies - 1)));
2462 /* For an inverted constant, we will need to set the low bits,
2463 these will be shifted out of harm's way. */
2464 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2465 if (const_ok_for_arm (~temp1))
2469 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2470 emit_constant_insn (cond,
2471 gen_rtx_SET (VOIDmode, new_src,
2473 emit_constant_insn (cond,
2474 gen_ashrsi3 (target, new_src,
2475 GEN_INT (set_sign_bit_copies - 1)));
2481 /* See if we can calculate the value as the difference between two
2482 valid immediates. */
2483 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2485 int topshift = clear_sign_bit_copies & ~1;
2487 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2488 & (0xff000000 >> topshift));
2490 /* If temp1 is zero, then that means the 9 most significant
2491 bits of remainder were 1 and we've caused it to overflow.
2492 When topshift is 0 we don't need to do anything since we
2493 can borrow from 'bit 32'. */
2494 if (temp1 == 0 && topshift != 0)
2495 temp1 = 0x80000000 >> (topshift - 1);
2497 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2499 if (const_ok_for_arm (temp2))
2503 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2504 emit_constant_insn (cond,
2505 gen_rtx_SET (VOIDmode, new_src,
2507 emit_constant_insn (cond,
2508 gen_addsi3 (target, new_src,
2516 /* See if we can generate this by setting the bottom (or the top)
2517 16 bits, and then shifting these into the other half of the
2518 word. We only look for the simplest cases, to do more would cost
2519 too much. Be careful, however, not to generate this when the
2520 alternative would take fewer insns. */
2521 if (val & 0xffff0000)
2523 temp1 = remainder & 0xffff0000;
2524 temp2 = remainder & 0x0000ffff;
2526 /* Overlaps outside this range are best done using other methods. */
2527 for (i = 9; i < 24; i++)
2529 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2530 && !const_ok_for_arm (temp2))
2532 rtx new_src = (subtargets
2533 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2535 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2536 source, subtargets, generate);
2544 gen_rtx_ASHIFT (mode, source,
2551 /* Don't duplicate cases already considered. */
2552 for (i = 17; i < 24; i++)
2554 if (((temp1 | (temp1 >> i)) == remainder)
2555 && !const_ok_for_arm (temp1))
2557 rtx new_src = (subtargets
2558 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2560 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2561 source, subtargets, generate);
2566 gen_rtx_SET (VOIDmode, target,
2569 gen_rtx_LSHIFTRT (mode, source,
2580 /* If we have IOR or XOR, and the constant can be loaded in a
2581 single instruction, and we can find a temporary to put it in,
2582 then this can be done in two instructions instead of 3-4. */
2584 /* TARGET can't be NULL if SUBTARGETS is 0 */
2585 || (reload_completed && !reg_mentioned_p (target, source)))
2587 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2591 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2593 emit_constant_insn (cond,
2594 gen_rtx_SET (VOIDmode, sub,
2596 emit_constant_insn (cond,
2597 gen_rtx_SET (VOIDmode, target,
2598 gen_rtx_fmt_ee (code, mode,
2609 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2610 and the remainder 0s for e.g. 0xfff00000)
2611 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2613 This can be done in 2 instructions by using shifts with mov or mvn.
2618 mvn r0, r0, lsr #12 */
2619 if (set_sign_bit_copies > 8
2620 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2624 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2625 rtx shift = GEN_INT (set_sign_bit_copies);
2629 gen_rtx_SET (VOIDmode, sub,
2631 gen_rtx_ASHIFT (mode,
2636 gen_rtx_SET (VOIDmode, target,
2638 gen_rtx_LSHIFTRT (mode, sub,
2645 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2647 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2649 For eg. r0 = r0 | 0xfff
2654 if (set_zero_bit_copies > 8
2655 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2659 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2660 rtx shift = GEN_INT (set_zero_bit_copies);
2664 gen_rtx_SET (VOIDmode, sub,
2666 gen_rtx_LSHIFTRT (mode,
2671 gen_rtx_SET (VOIDmode, target,
2673 gen_rtx_ASHIFT (mode, sub,
2679 /* This will never be reached for Thumb2 because orn is a valid
2680 instruction. This is for Thumb1 and the ARM 32 bit cases.
2682 x = y | constant (such that ~constant is a valid constant)
2684 x = ~(~y & ~constant).
2686 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2690 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2691 emit_constant_insn (cond,
2692 gen_rtx_SET (VOIDmode, sub,
2693 gen_rtx_NOT (mode, source)));
2696 sub = gen_reg_rtx (mode);
2697 emit_constant_insn (cond,
2698 gen_rtx_SET (VOIDmode, sub,
2699 gen_rtx_AND (mode, source,
2701 emit_constant_insn (cond,
2702 gen_rtx_SET (VOIDmode, target,
2703 gen_rtx_NOT (mode, sub)));
2710 /* See if two shifts will do 2 or more insn's worth of work. */
2711 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2713 HOST_WIDE_INT shift_mask = ((0xffffffff
2714 << (32 - clear_sign_bit_copies))
2717 if ((remainder | shift_mask) != 0xffffffff)
2721 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2722 insns = arm_gen_constant (AND, mode, cond,
2723 remainder | shift_mask,
2724 new_src, source, subtargets, 1);
2729 rtx targ = subtargets ? NULL_RTX : target;
2730 insns = arm_gen_constant (AND, mode, cond,
2731 remainder | shift_mask,
2732 targ, source, subtargets, 0);
2738 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2739 rtx shift = GEN_INT (clear_sign_bit_copies);
2741 emit_insn (gen_ashlsi3 (new_src, source, shift));
2742 emit_insn (gen_lshrsi3 (target, new_src, shift));
2748 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2750 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2752 if ((remainder | shift_mask) != 0xffffffff)
2756 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2758 insns = arm_gen_constant (AND, mode, cond,
2759 remainder | shift_mask,
2760 new_src, source, subtargets, 1);
2765 rtx targ = subtargets ? NULL_RTX : target;
2767 insns = arm_gen_constant (AND, mode, cond,
2768 remainder | shift_mask,
2769 targ, source, subtargets, 0);
2775 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2776 rtx shift = GEN_INT (clear_zero_bit_copies);
2778 emit_insn (gen_lshrsi3 (new_src, source, shift));
2779 emit_insn (gen_ashlsi3 (target, new_src, shift));
2791 for (i = 0; i < 32; i++)
2792 if (remainder & (1 << i))
2796 || (code != IOR && can_invert && num_bits_set > 16))
2797 remainder = (~remainder) & 0xffffffff;
2798 else if (code == PLUS && num_bits_set > 16)
2799 remainder = (-remainder) & 0xffffffff;
2806 /* Now try and find a way of doing the job in either two or three
2808 We start by looking for the largest block of zeros that are aligned on
2809 a 2-bit boundary, we then fill up the temps, wrapping around to the
2810 top of the word when we drop off the bottom.
2811 In the worst case this code should produce no more than four insns.
2812 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2813 best place to start. */
2815 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2821 int best_consecutive_zeros = 0;
2823 for (i = 0; i < 32; i += 2)
2825 int consecutive_zeros = 0;
2827 if (!(remainder & (3 << i)))
2829 while ((i < 32) && !(remainder & (3 << i)))
2831 consecutive_zeros += 2;
2834 if (consecutive_zeros > best_consecutive_zeros)
2836 best_consecutive_zeros = consecutive_zeros;
2837 best_start = i - consecutive_zeros;
2843 /* So long as it won't require any more insns to do so, it's
2844 desirable to emit a small constant (in bits 0...9) in the last
2845 insn. This way there is more chance that it can be combined with
2846 a later addressing insn to form a pre-indexed load or store
2847 operation. Consider:
2849 *((volatile int *)0xe0000100) = 1;
2850 *((volatile int *)0xe0000110) = 2;
2852 We want this to wind up as:
2856 str rB, [rA, #0x100]
2858 str rB, [rA, #0x110]
2860 rather than having to synthesize both large constants from scratch.
2862 Therefore, we calculate how many insns would be required to emit
2863 the constant starting from `best_start', and also starting from
2864 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2865 yield a shorter sequence, we may as well use zero. */
2867 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2868 && (count_insns_for_constant (remainder, 0) <=
2869 count_insns_for_constant (remainder, best_start)))
2873 /* Now start emitting the insns. */
2881 if (remainder & (3 << (i - 2)))
2886 temp1 = remainder & ((0x0ff << end)
2887 | ((i < end) ? (0xff >> (32 - end)) : 0));
2888 remainder &= ~temp1;
2892 rtx new_src, temp1_rtx;
2894 if (code == SET || code == MINUS)
2896 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2897 if (can_invert && code != MINUS)
2902 if (remainder && subtargets)
2903 new_src = gen_reg_rtx (mode);
2908 else if (can_negate)
2912 temp1 = trunc_int_for_mode (temp1, mode);
2913 temp1_rtx = GEN_INT (temp1);
2917 else if (code == MINUS)
2918 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2920 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2922 emit_constant_insn (cond,
2923 gen_rtx_SET (VOIDmode, new_src,
2933 else if (code == MINUS)
2942 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
2955 /* Canonicalize a comparison so that we are more likely to recognize it.
2956 This can be done for a few constant compares, where we can make the
2957 immediate value easier to load. */
2960 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2963 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2964 unsigned HOST_WIDE_INT maxval;
2965 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2976 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2978 *op1 = GEN_INT (i + 1);
2979 return code == GT ? GE : LT;
2986 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2988 *op1 = GEN_INT (i - 1);
2989 return code == GE ? GT : LE;
2995 if (i != ~((unsigned HOST_WIDE_INT) 0)
2996 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2998 *op1 = GEN_INT (i + 1);
2999 return code == GTU ? GEU : LTU;
3006 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3008 *op1 = GEN_INT (i - 1);
3009 return code == GEU ? GTU : LEU;
3021 /* Define how to find the value returned by a function. */
3024 arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
3026 enum machine_mode mode;
3027 int unsignedp ATTRIBUTE_UNUSED;
3028 rtx r ATTRIBUTE_UNUSED;
3030 mode = TYPE_MODE (type);
3031 /* Promote integer types. */
3032 if (INTEGRAL_TYPE_P (type))
3033 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
3035 /* Promotes small structs returned in a register to full-word size
3036 for big-endian AAPCS. */
3037 if (arm_return_in_msb (type))
3039 HOST_WIDE_INT size = int_size_in_bytes (type);
3040 if (size % UNITS_PER_WORD != 0)
3042 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3043 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3047 return LIBCALL_VALUE(mode);
3050 /* Determine the amount of memory needed to store the possible return
3051 registers of an untyped call. */
3053 arm_apply_result_size (void)
3059 if (TARGET_HARD_FLOAT_ABI)
3063 if (TARGET_MAVERICK)
3066 if (TARGET_IWMMXT_ABI)
3073 /* Decide whether a type should be returned in memory (true)
3074 or in a register (false). This is called as the target hook
3075 TARGET_RETURN_IN_MEMORY. */
3077 arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
3081 size = int_size_in_bytes (type);
3083 /* Vector values should be returned using ARM registers, not memory (unless
3084 they're over 16 bytes, which will break since we only have four
3085 call-clobbered registers to play with). */
3086 if (TREE_CODE (type) == VECTOR_TYPE)
3087 return (size < 0 || size > (4 * UNITS_PER_WORD));
3089 if (!AGGREGATE_TYPE_P (type) &&
3090 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
3091 /* All simple types are returned in registers.
3092 For AAPCS, complex types are treated the same as aggregates. */
3095 if (arm_abi != ARM_ABI_APCS)
3097 /* ATPCS and later return aggregate types in memory only if they are
3098 larger than a word (or are variable size). */
3099 return (size < 0 || size > UNITS_PER_WORD);
3102 /* For the arm-wince targets we choose to be compatible with Microsoft's
3103 ARM and Thumb compilers, which always return aggregates in memory. */
3105 /* All structures/unions bigger than one word are returned in memory.
3106 Also catch the case where int_size_in_bytes returns -1. In this case
3107 the aggregate is either huge or of variable size, and in either case
3108 we will want to return it via memory and not in a register. */
3109 if (size < 0 || size > UNITS_PER_WORD)
3112 if (TREE_CODE (type) == RECORD_TYPE)
3116 /* For a struct the APCS says that we only return in a register
3117 if the type is 'integer like' and every addressable element
3118 has an offset of zero. For practical purposes this means
3119 that the structure can have at most one non bit-field element
3120 and that this element must be the first one in the structure. */
3122 /* Find the first field, ignoring non FIELD_DECL things which will
3123 have been created by C++. */
3124 for (field = TYPE_FIELDS (type);
3125 field && TREE_CODE (field) != FIELD_DECL;
3126 field = TREE_CHAIN (field))
3130 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
3132 /* Check that the first field is valid for returning in a register. */
3134 /* ... Floats are not allowed */
3135 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3138 /* ... Aggregates that are not themselves valid for returning in
3139 a register are not allowed. */
3140 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3143 /* Now check the remaining fields, if any. Only bitfields are allowed,
3144 since they are not addressable. */
3145 for (field = TREE_CHAIN (field);
3147 field = TREE_CHAIN (field))
3149 if (TREE_CODE (field) != FIELD_DECL)
3152 if (!DECL_BIT_FIELD_TYPE (field))
3159 if (TREE_CODE (type) == UNION_TYPE)
3163 /* Unions can be returned in registers if every element is
3164 integral, or can be returned in an integer register. */
3165 for (field = TYPE_FIELDS (type);
3167 field = TREE_CHAIN (field))
3169 if (TREE_CODE (field) != FIELD_DECL)
3172 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3175 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3181 #endif /* not ARM_WINCE */
3183 /* Return all other types in memory. */
3187 /* Indicate whether or not words of a double are in big-endian order. */
3190 arm_float_words_big_endian (void)
3192 if (TARGET_MAVERICK)
3195 /* For FPA, float words are always big-endian. For VFP, floats words
3196 follow the memory system mode. */
3204 return (TARGET_BIG_END ? 1 : 0);
3209 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3210 for a call to a function whose data type is FNTYPE.
3211 For a library call, FNTYPE is NULL. */
3213 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
3214 rtx libname ATTRIBUTE_UNUSED,
3215 tree fndecl ATTRIBUTE_UNUSED)
3217 /* On the ARM, the offset starts at 0. */
3219 pcum->iwmmxt_nregs = 0;
3220 pcum->can_split = true;
3222 /* Varargs vectors are treated the same as long long.
3223 named_count avoids having to change the way arm handles 'named' */
3224 pcum->named_count = 0;
3227 if (TARGET_REALLY_IWMMXT && fntype)
3231 for (fn_arg = TYPE_ARG_TYPES (fntype);
3233 fn_arg = TREE_CHAIN (fn_arg))
3234 pcum->named_count += 1;
3236 if (! pcum->named_count)
3237 pcum->named_count = INT_MAX;
3242 /* Return true if mode/type need doubleword alignment. */
3244 arm_needs_doubleword_align (enum machine_mode mode, tree type)
3246 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
3247 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
3251 /* Determine where to put an argument to a function.
3252 Value is zero to push the argument on the stack,
3253 or a hard register in which to store the argument.
3255 MODE is the argument's machine mode.
3256 TYPE is the data type of the argument (as a tree).
3257 This is null for libcalls where that information may
3259 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3260 the preceding args and about the function being called.
3261 NAMED is nonzero if this argument is a named parameter
3262 (otherwise it is an extra parameter matching an ellipsis). */
3265 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3266 tree type, int named)
3270 /* Varargs vectors are treated the same as long long.
3271 named_count avoids having to change the way arm handles 'named' */
3272 if (TARGET_IWMMXT_ABI
3273 && arm_vector_mode_supported_p (mode)
3274 && pcum->named_count > pcum->nargs + 1)
3276 if (pcum->iwmmxt_nregs <= 9)
3277 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
3280 pcum->can_split = false;
3285 /* Put doubleword aligned quantities in even register pairs. */
3287 && ARM_DOUBLEWORD_ALIGN
3288 && arm_needs_doubleword_align (mode, type))
3291 if (mode == VOIDmode)
3292 /* Pick an arbitrary value for operand 2 of the call insn. */
3295 /* Only allow splitting an arg between regs and memory if all preceding
3296 args were allocated to regs. For args passed by reference we only count
3297 the reference pointer. */
3298 if (pcum->can_split)
3301 nregs = ARM_NUM_REGS2 (mode, type);
3303 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
3306 return gen_rtx_REG (mode, pcum->nregs);
3310 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3311 tree type, bool named ATTRIBUTE_UNUSED)
3313 int nregs = pcum->nregs;
3315 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
3318 if (NUM_ARG_REGS > nregs
3319 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
3321 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
3326 /* Variable sized types are passed by reference. This is a GCC
3327 extension to the ARM ABI. */
3330 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3331 enum machine_mode mode ATTRIBUTE_UNUSED,
3332 const_tree type, bool named ATTRIBUTE_UNUSED)
3334 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3337 /* Encode the current state of the #pragma [no_]long_calls. */
3340 OFF, /* No #pragma [no_]long_calls is in effect. */
3341 LONG, /* #pragma long_calls is in effect. */
3342 SHORT /* #pragma no_long_calls is in effect. */
3345 static arm_pragma_enum arm_pragma_long_calls = OFF;
3348 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3350 arm_pragma_long_calls = LONG;
3354 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3356 arm_pragma_long_calls = SHORT;
3360 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3362 arm_pragma_long_calls = OFF;
3365 /* Table of machine attributes. */
3366 const struct attribute_spec arm_attribute_table[] =
3368 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
3369 /* Function calls made to this symbol must be done indirectly, because
3370 it may lie outside of the 26 bit addressing range of a normal function
3372 { "long_call", 0, 0, false, true, true, NULL },
3373 /* Whereas these functions are always known to reside within the 26 bit
3374 addressing range. */
3375 { "short_call", 0, 0, false, true, true, NULL },
3376 /* Interrupt Service Routines have special prologue and epilogue requirements. */
3377 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
3378 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
3379 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3381 /* ARM/PE has three new attributes:
3383 dllexport - for exporting a function/variable that will live in a dll
3384 dllimport - for importing a function/variable from a dll
3386 Microsoft allows multiple declspecs in one __declspec, separating
3387 them with spaces. We do NOT support this. Instead, use __declspec
3390 { "dllimport", 0, 0, true, false, false, NULL },
3391 { "dllexport", 0, 0, true, false, false, NULL },
3392 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3393 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
3394 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
3395 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3396 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
3398 { NULL, 0, 0, false, false, false, NULL }
3401 /* Handle an attribute requiring a FUNCTION_DECL;
3402 arguments as in struct attribute_spec.handler. */
3404 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
3405 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
3407 if (TREE_CODE (*node) != FUNCTION_DECL)
3409 warning (OPT_Wattributes, "%qE attribute only applies to functions",
3411 *no_add_attrs = true;
3417 /* Handle an "interrupt" or "isr" attribute;
3418 arguments as in struct attribute_spec.handler. */
3420 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
3425 if (TREE_CODE (*node) != FUNCTION_DECL)
3427 warning (OPT_Wattributes, "%qE attribute only applies to functions",
3429 *no_add_attrs = true;
3431 /* FIXME: the argument if any is checked for type attributes;
3432 should it be checked for decl ones? */
3436 if (TREE_CODE (*node) == FUNCTION_TYPE
3437 || TREE_CODE (*node) == METHOD_TYPE)
3439 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
3441 warning (OPT_Wattributes, "%qE attribute ignored",
3443 *no_add_attrs = true;
3446 else if (TREE_CODE (*node) == POINTER_TYPE
3447 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
3448 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
3449 && arm_isr_value (args) != ARM_FT_UNKNOWN)
3451 *node = build_variant_type_copy (*node);
3452 TREE_TYPE (*node) = build_type_attribute_variant
3454 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
3455 *no_add_attrs = true;
3459 /* Possibly pass this attribute on from the type to a decl. */
3460 if (flags & ((int) ATTR_FLAG_DECL_NEXT
3461 | (int) ATTR_FLAG_FUNCTION_NEXT
3462 | (int) ATTR_FLAG_ARRAY_NEXT))
3464 *no_add_attrs = true;
3465 return tree_cons (name, args, NULL_TREE);
3469 warning (OPT_Wattributes, "%qE attribute ignored",
3478 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
3479 /* Handle the "notshared" attribute. This attribute is another way of
3480 requesting hidden visibility. ARM's compiler supports
3481 "__declspec(notshared)"; we support the same thing via an
3485 arm_handle_notshared_attribute (tree *node,
3486 tree name ATTRIBUTE_UNUSED,
3487 tree args ATTRIBUTE_UNUSED,
3488 int flags ATTRIBUTE_UNUSED,
3491 tree decl = TYPE_NAME (*node);
3495 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
3496 DECL_VISIBILITY_SPECIFIED (decl) = 1;
3497 *no_add_attrs = false;
3503 /* Return 0 if the attributes for two types are incompatible, 1 if they
3504 are compatible, and 2 if they are nearly compatible (which causes a
3505 warning to be generated). */
3507 arm_comp_type_attributes (const_tree type1, const_tree type2)
3511 /* Check for mismatch of non-default calling convention. */
3512 if (TREE_CODE (type1) != FUNCTION_TYPE)
3515 /* Check for mismatched call attributes. */
3516 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
3517 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
3518 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
3519 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
3521 /* Only bother to check if an attribute is defined. */
3522 if (l1 | l2 | s1 | s2)
3524 /* If one type has an attribute, the other must have the same attribute. */
3525 if ((l1 != l2) || (s1 != s2))
3528 /* Disallow mixed attributes. */
3529 if ((l1 & s2) || (l2 & s1))
3533 /* Check for mismatched ISR attribute. */
3534 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3536 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3537 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3539 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3546 /* Assigns default attributes to newly defined type. This is used to
3547 set short_call/long_call attributes for function types of
3548 functions defined inside corresponding #pragma scopes. */
3550 arm_set_default_type_attributes (tree type)
3552 /* Add __attribute__ ((long_call)) to all functions, when
3553 inside #pragma long_calls or __attribute__ ((short_call)),
3554 when inside #pragma no_long_calls. */
3555 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3557 tree type_attr_list, attr_name;
3558 type_attr_list = TYPE_ATTRIBUTES (type);
3560 if (arm_pragma_long_calls == LONG)
3561 attr_name = get_identifier ("long_call");
3562 else if (arm_pragma_long_calls == SHORT)
3563 attr_name = get_identifier ("short_call");
3567 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3568 TYPE_ATTRIBUTES (type) = type_attr_list;
3572 /* Return true if DECL is known to be linked into section SECTION. */
3575 arm_function_in_section_p (tree decl, section *section)
3577 /* We can only be certain about functions defined in the same
3578 compilation unit. */
3579 if (!TREE_STATIC (decl))
3582 /* Make sure that SYMBOL always binds to the definition in this
3583 compilation unit. */
3584 if (!targetm.binds_local_p (decl))
3587 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
3588 if (!DECL_SECTION_NAME (decl))
3590 /* Make sure that we will not create a unique section for DECL. */
3591 if (flag_function_sections || DECL_ONE_ONLY (decl))
3595 return function_section (decl) == section;
3598 /* Return nonzero if a 32-bit "long_call" should be generated for
3599 a call from the current function to DECL. We generate a long_call
3602 a. has an __attribute__((long call))
3603 or b. is within the scope of a #pragma long_calls
3604 or c. the -mlong-calls command line switch has been specified
3606 However we do not generate a long call if the function:
3608 d. has an __attribute__ ((short_call))
3609 or e. is inside the scope of a #pragma no_long_calls
3610 or f. is defined in the same section as the current function. */
3613 arm_is_long_call_p (tree decl)
3618 return TARGET_LONG_CALLS;
3620 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
3621 if (lookup_attribute ("short_call", attrs))
3624 /* For "f", be conservative, and only cater for cases in which the
3625 whole of the current function is placed in the same section. */
3626 if (!flag_reorder_blocks_and_partition
3627 && TREE_CODE (decl) == FUNCTION_DECL
3628 && arm_function_in_section_p (decl, current_function_section ()))
3631 if (lookup_attribute ("long_call", attrs))
3634 return TARGET_LONG_CALLS;
3637 /* Return nonzero if it is ok to make a tail-call to DECL. */
3639 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3641 unsigned long func_type;
3643 if (cfun->machine->sibcall_blocked)
3646 /* Never tailcall something for which we have no decl, or if we
3647 are in Thumb mode. */
3648 if (decl == NULL || TARGET_THUMB)
3651 /* The PIC register is live on entry to VxWorks PLT entries, so we
3652 must make the call before restoring the PIC register. */
3653 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
3656 /* Cannot tail-call to long calls, since these are out of range of
3657 a branch instruction. */
3658 if (arm_is_long_call_p (decl))
3661 /* If we are interworking and the function is not declared static
3662 then we can't tail-call it unless we know that it exists in this
3663 compilation unit (since it might be a Thumb routine). */
3664 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3667 func_type = arm_current_func_type ();
3668 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3669 if (IS_INTERRUPT (func_type))
3672 /* Never tailcall if function may be called with a misaligned SP. */
3673 if (IS_STACKALIGN (func_type))
3676 /* Everything else is ok. */
3681 /* Addressing mode support functions. */
3683 /* Return nonzero if X is a legitimate immediate operand when compiling
3684 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
3686 legitimate_pic_operand_p (rtx x)
3688 if (GET_CODE (x) == SYMBOL_REF
3689 || (GET_CODE (x) == CONST
3690 && GET_CODE (XEXP (x, 0)) == PLUS
3691 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3697 /* Record that the current function needs a PIC register. Initialize
3698 cfun->machine->pic_reg if we have not already done so. */
3701 require_pic_register (void)
3703 /* A lot of the logic here is made obscure by the fact that this
3704 routine gets called as part of the rtx cost estimation process.
3705 We don't want those calls to affect any assumptions about the real
3706 function; and further, we can't call entry_of_function() until we
3707 start the real expansion process. */
3708 if (!crtl->uses_pic_offset_table)
3710 gcc_assert (can_create_pseudo_p ());
3711 if (arm_pic_register != INVALID_REGNUM)
3713 if (!cfun->machine->pic_reg)
3714 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3716 /* Play games to avoid marking the function as needing pic
3717 if we are being called as part of the cost-estimation
3719 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
3720 crtl->uses_pic_offset_table = 1;
3726 if (!cfun->machine->pic_reg)
3727 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3729 /* Play games to avoid marking the function as needing pic
3730 if we are being called as part of the cost-estimation
3732 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
3734 crtl->uses_pic_offset_table = 1;
3737 arm_load_pic_register (0UL);
3741 /* We can be called during expansion of PHI nodes, where
3742 we can't yet emit instructions directly in the final
3743 insn stream. Queue the insns on the entry edge, they will
3744 be committed after everything else is expanded. */
3745 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
3752 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3754 if (GET_CODE (orig) == SYMBOL_REF
3755 || GET_CODE (orig) == LABEL_REF)
3757 rtx pic_ref, address;
3761 /* If this function doesn't have a pic register, create one now. */
3762 require_pic_register ();
3766 gcc_assert (can_create_pseudo_p ());
3767 reg = gen_reg_rtx (Pmode);
3773 address = gen_reg_rtx (Pmode);
3778 emit_insn (gen_pic_load_addr_arm (address, orig));
3779 else if (TARGET_THUMB2)
3780 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
3781 else /* TARGET_THUMB1 */
3782 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
3784 /* VxWorks does not impose a fixed gap between segments; the run-time
3785 gap can be different from the object-file gap. We therefore can't
3786 use GOTOFF unless we are absolutely sure that the symbol is in the
3787 same segment as the GOT. Unfortunately, the flexibility of linker
3788 scripts means that we can't be sure of that in general, so assume
3789 that GOTOFF is never valid on VxWorks. */
3790 if ((GET_CODE (orig) == LABEL_REF
3791 || (GET_CODE (orig) == SYMBOL_REF &&
3792 SYMBOL_REF_LOCAL_P (orig)))
3794 && !TARGET_VXWORKS_RTP)
3795 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3798 pic_ref = gen_const_mem (Pmode,
3799 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3803 insn = emit_move_insn (reg, pic_ref);
3805 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3807 set_unique_reg_note (insn, REG_EQUAL, orig);
3811 else if (GET_CODE (orig) == CONST)
3815 if (GET_CODE (XEXP (orig, 0)) == PLUS
3816 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3819 /* Handle the case where we have: const (UNSPEC_TLS). */
3820 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3821 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3824 /* Handle the case where we have:
3825 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
3827 if (GET_CODE (XEXP (orig, 0)) == PLUS
3828 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
3829 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
3831 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
3837 gcc_assert (can_create_pseudo_p ());
3838 reg = gen_reg_rtx (Pmode);
3841 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3843 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3844 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3845 base == reg ? 0 : reg);
3847 if (GET_CODE (offset) == CONST_INT)
3849 /* The base register doesn't really matter, we only want to
3850 test the index for the appropriate mode. */
3851 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3853 gcc_assert (can_create_pseudo_p ());
3854 offset = force_reg (Pmode, offset);
3857 if (GET_CODE (offset) == CONST_INT)
3858 return plus_constant (base, INTVAL (offset));
3861 if (GET_MODE_SIZE (mode) > 4
3862 && (GET_MODE_CLASS (mode) == MODE_INT
3863 || TARGET_SOFT_FLOAT))
3865 emit_insn (gen_addsi3 (reg, base, offset));
3869 return gen_rtx_PLUS (Pmode, base, offset);
3876 /* Find a spare register to use during the prolog of a function. */
3879 thumb_find_work_register (unsigned long pushed_regs_mask)
3883 /* Check the argument registers first as these are call-used. The
3884 register allocation order means that sometimes r3 might be used
3885 but earlier argument registers might not, so check them all. */
3886 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3887 if (!df_regs_ever_live_p (reg))
3890 /* Before going on to check the call-saved registers we can try a couple
3891 more ways of deducing that r3 is available. The first is when we are
3892 pushing anonymous arguments onto the stack and we have less than 4
3893 registers worth of fixed arguments(*). In this case r3 will be part of
3894 the variable argument list and so we can be sure that it will be
3895 pushed right at the start of the function. Hence it will be available
3896 for the rest of the prologue.
3897 (*): ie crtl->args.pretend_args_size is greater than 0. */
3898 if (cfun->machine->uses_anonymous_args
3899 && crtl->args.pretend_args_size > 0)
3900 return LAST_ARG_REGNUM;
3902 /* The other case is when we have fixed arguments but less than 4 registers
3903 worth. In this case r3 might be used in the body of the function, but
3904 it is not being used to convey an argument into the function. In theory
3905 we could just check crtl->args.size to see how many bytes are
3906 being passed in argument registers, but it seems that it is unreliable.
3907 Sometimes it will have the value 0 when in fact arguments are being
3908 passed. (See testcase execute/20021111-1.c for an example). So we also
3909 check the args_info.nregs field as well. The problem with this field is
3910 that it makes no allowances for arguments that are passed to the
3911 function but which are not used. Hence we could miss an opportunity
3912 when a function has an unused argument in r3. But it is better to be
3913 safe than to be sorry. */
3914 if (! cfun->machine->uses_anonymous_args
3915 && crtl->args.size >= 0
3916 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3917 && crtl->args.info.nregs < 4)
3918 return LAST_ARG_REGNUM;
3920 /* Otherwise look for a call-saved register that is going to be pushed. */
3921 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3922 if (pushed_regs_mask & (1 << reg))
3927 /* Thumb-2 can use high regs. */
3928 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
3929 if (pushed_regs_mask & (1 << reg))
3932 /* Something went wrong - thumb_compute_save_reg_mask()
3933 should have arranged for a suitable register to be pushed. */
3937 static GTY(()) int pic_labelno;
3939 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3943 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3945 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
3947 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3950 gcc_assert (flag_pic);
3952 pic_reg = cfun->machine->pic_reg;
3953 if (TARGET_VXWORKS_RTP)
3955 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
3956 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3957 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3959 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
3961 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
3962 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
3966 /* We use an UNSPEC rather than a LABEL_REF because this label
3967 never appears in the code stream. */
3969 labelno = GEN_INT (pic_labelno++);
3970 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3971 l1 = gen_rtx_CONST (VOIDmode, l1);
3973 /* On the ARM the PC register contains 'dot + 8' at the time of the
3974 addition, on the Thumb it is 'dot + 4'. */
3975 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
3976 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
3978 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3982 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3983 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
3985 else if (TARGET_THUMB2)
3987 /* Thumb-2 only allows very limited access to the PC. Calculate the
3988 address in a temporary register. */
3989 if (arm_pic_register != INVALID_REGNUM)
3991 pic_tmp = gen_rtx_REG (SImode,
3992 thumb_find_work_register (saved_regs));
3996 gcc_assert (can_create_pseudo_p ());
3997 pic_tmp = gen_reg_rtx (Pmode);
4000 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
4001 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
4002 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
4004 else /* TARGET_THUMB1 */
4006 if (arm_pic_register != INVALID_REGNUM
4007 && REGNO (pic_reg) > LAST_LO_REGNUM)
4009 /* We will have pushed the pic register, so we should always be
4010 able to find a work register. */
4011 pic_tmp = gen_rtx_REG (SImode,
4012 thumb_find_work_register (saved_regs));
4013 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
4014 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
4017 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
4018 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
4022 /* Need to emit this whether or not we obey regdecls,
4023 since setjmp/longjmp can cause life info to screw up. */
4028 /* Return nonzero if X is valid as an ARM state addressing register. */
4030 arm_address_register_rtx_p (rtx x, int strict_p)
4034 if (GET_CODE (x) != REG)
4040 return ARM_REGNO_OK_FOR_BASE_P (regno);
4042 return (regno <= LAST_ARM_REGNUM
4043 || regno >= FIRST_PSEUDO_REGISTER
4044 || regno == FRAME_POINTER_REGNUM
4045 || regno == ARG_POINTER_REGNUM);
4048 /* Return TRUE if this rtx is the difference of a symbol and a label,
4049 and will reduce to a PC-relative relocation in the object file.
4050 Expressions like this can be left alone when generating PIC, rather
4051 than forced through the GOT. */
4053 pcrel_constant_p (rtx x)
4055 if (GET_CODE (x) == MINUS)
4056 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
4061 /* Return nonzero if X is a valid ARM state address operand. */
4063 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
4067 enum rtx_code code = GET_CODE (x);
4069 if (arm_address_register_rtx_p (x, strict_p))
4072 use_ldrd = (TARGET_LDRD
4074 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
4076 if (code == POST_INC || code == PRE_DEC
4077 || ((code == PRE_INC || code == POST_DEC)
4078 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
4079 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
4081 else if ((code == POST_MODIFY || code == PRE_MODIFY)
4082 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
4083 && GET_CODE (XEXP (x, 1)) == PLUS
4084 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
4086 rtx addend = XEXP (XEXP (x, 1), 1);
4088 /* Don't allow ldrd post increment by register because it's hard
4089 to fixup invalid register choices. */
4091 && GET_CODE (x) == POST_MODIFY
4092 && GET_CODE (addend) == REG)
4095 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
4096 && arm_legitimate_index_p (mode, addend, outer, strict_p));
4099 /* After reload constants split into minipools will have addresses
4100 from a LABEL_REF. */
4101 else if (reload_completed
4102 && (code == LABEL_REF
4104 && GET_CODE (XEXP (x, 0)) == PLUS
4105 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4106 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4109 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
4112 else if (code == PLUS)
4114 rtx xop0 = XEXP (x, 0);
4115 rtx xop1 = XEXP (x, 1);
4117 return ((arm_address_register_rtx_p (xop0, strict_p)
4118 && GET_CODE(xop1) == CONST_INT
4119 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
4120 || (arm_address_register_rtx_p (xop1, strict_p)
4121 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
4125 /* Reload currently can't handle MINUS, so disable this for now */
4126 else if (GET_CODE (x) == MINUS)
4128 rtx xop0 = XEXP (x, 0);
4129 rtx xop1 = XEXP (x, 1);
4131 return (arm_address_register_rtx_p (xop0, strict_p)
4132 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
4136 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4137 && code == SYMBOL_REF
4138 && CONSTANT_POOL_ADDRESS_P (x)
4140 && symbol_mentioned_p (get_pool_constant (x))
4141 && ! pcrel_constant_p (get_pool_constant (x))))
4147 /* Return nonzero if X is a valid Thumb-2 address operand. */
4149 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4152 enum rtx_code code = GET_CODE (x);
4154 if (arm_address_register_rtx_p (x, strict_p))
4157 use_ldrd = (TARGET_LDRD
4159 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
4161 if (code == POST_INC || code == PRE_DEC
4162 || ((code == PRE_INC || code == POST_DEC)
4163 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
4164 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
4166 else if ((code == POST_MODIFY || code == PRE_MODIFY)
4167 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
4168 && GET_CODE (XEXP (x, 1)) == PLUS
4169 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
4171 /* Thumb-2 only has autoincrement by constant. */
4172 rtx addend = XEXP (XEXP (x, 1), 1);
4173 HOST_WIDE_INT offset;
4175 if (GET_CODE (addend) != CONST_INT)
4178 offset = INTVAL(addend);
4179 if (GET_MODE_SIZE (mode) <= 4)
4180 return (offset > -256 && offset < 256);
4182 return (use_ldrd && offset > -1024 && offset < 1024
4183 && (offset & 3) == 0);
4186 /* After reload constants split into minipools will have addresses
4187 from a LABEL_REF. */
4188 else if (reload_completed
4189 && (code == LABEL_REF
4191 && GET_CODE (XEXP (x, 0)) == PLUS
4192 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4193 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4196 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
4199 else if (code == PLUS)
4201 rtx xop0 = XEXP (x, 0);
4202 rtx xop1 = XEXP (x, 1);
4204 return ((arm_address_register_rtx_p (xop0, strict_p)
4205 && thumb2_legitimate_index_p (mode, xop1, strict_p))
4206 || (arm_address_register_rtx_p (xop1, strict_p)
4207 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
4210 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4211 && code == SYMBOL_REF
4212 && CONSTANT_POOL_ADDRESS_P (x)
4214 && symbol_mentioned_p (get_pool_constant (x))
4215 && ! pcrel_constant_p (get_pool_constant (x))))
4221 /* Return nonzero if INDEX is valid for an address index operand in
4224 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
4227 HOST_WIDE_INT range;
4228 enum rtx_code code = GET_CODE (index);
4230 /* Standard coprocessor addressing modes. */
4231 if (TARGET_HARD_FLOAT
4232 && (TARGET_FPA || TARGET_MAVERICK)
4233 && (GET_MODE_CLASS (mode) == MODE_FLOAT
4234 || (TARGET_MAVERICK && mode == DImode)))
4235 return (code == CONST_INT && INTVAL (index) < 1024
4236 && INTVAL (index) > -1024
4237 && (INTVAL (index) & 3) == 0);
4240 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4241 return (code == CONST_INT
4242 && INTVAL (index) < 1016
4243 && INTVAL (index) > -1024
4244 && (INTVAL (index) & 3) == 0);
4246 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4247 return (code == CONST_INT
4248 && INTVAL (index) < 1024
4249 && INTVAL (index) > -1024
4250 && (INTVAL (index) & 3) == 0);
4252 if (arm_address_register_rtx_p (index, strict_p)
4253 && (GET_MODE_SIZE (mode) <= 4))
4256 if (mode == DImode || mode == DFmode)
4258 if (code == CONST_INT)
4260 HOST_WIDE_INT val = INTVAL (index);
4263 return val > -256 && val < 256;
4265 return val > -4096 && val < 4092;
4268 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
4271 if (GET_MODE_SIZE (mode) <= 4
4275 || (mode == QImode && outer == SIGN_EXTEND))))
4279 rtx xiop0 = XEXP (index, 0);
4280 rtx xiop1 = XEXP (index, 1);
4282 return ((arm_address_register_rtx_p (xiop0, strict_p)
4283 && power_of_two_operand (xiop1, SImode))
4284 || (arm_address_register_rtx_p (xiop1, strict_p)
4285 && power_of_two_operand (xiop0, SImode)));
4287 else if (code == LSHIFTRT || code == ASHIFTRT
4288 || code == ASHIFT || code == ROTATERT)
4290 rtx op = XEXP (index, 1);
4292 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4293 && GET_CODE (op) == CONST_INT
4295 && INTVAL (op) <= 31);
4299 /* For ARM v4 we may be doing a sign-extend operation during the
4305 || (outer == SIGN_EXTEND && mode == QImode))
4311 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
4313 return (code == CONST_INT
4314 && INTVAL (index) < range
4315 && INTVAL (index) > -range);
4318 /* Return true if OP is a valid index scaling factor for Thumb-2 address
4319 index operand. i.e. 1, 2, 4 or 8. */
4321 thumb2_index_mul_operand (rtx op)
4325 if (GET_CODE(op) != CONST_INT)
4329 return (val == 1 || val == 2 || val == 4 || val == 8);
4332 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
4334 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
4336 enum rtx_code code = GET_CODE (index);
4338 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
4339 /* Standard coprocessor addressing modes. */
4340 if (TARGET_HARD_FLOAT
4341 && (TARGET_FPA || TARGET_MAVERICK)
4342 && (GET_MODE_CLASS (mode) == MODE_FLOAT
4343 || (TARGET_MAVERICK && mode == DImode)))
4344 return (code == CONST_INT && INTVAL (index) < 1024
4345 && INTVAL (index) > -1024
4346 && (INTVAL (index) & 3) == 0);
4348 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4350 /* For DImode assume values will usually live in core regs
4351 and only allow LDRD addressing modes. */
4352 if (!TARGET_LDRD || mode != DImode)
4353 return (code == CONST_INT
4354 && INTVAL (index) < 1024
4355 && INTVAL (index) > -1024
4356 && (INTVAL (index) & 3) == 0);
4360 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4361 return (code == CONST_INT
4362 && INTVAL (index) < 1016
4363 && INTVAL (index) > -1024
4364 && (INTVAL (index) & 3) == 0);
4366 if (arm_address_register_rtx_p (index, strict_p)
4367 && (GET_MODE_SIZE (mode) <= 4))
4370 if (mode == DImode || mode == DFmode)
4372 if (code == CONST_INT)
4374 HOST_WIDE_INT val = INTVAL (index);
4375 /* ??? Can we assume ldrd for thumb2? */
4376 /* Thumb-2 ldrd only has reg+const addressing modes. */
4377 /* ldrd supports offsets of +-1020.
4378 However the ldr fallback does not. */
4379 return val > -256 && val < 256 && (val & 3) == 0;
4387 rtx xiop0 = XEXP (index, 0);
4388 rtx xiop1 = XEXP (index, 1);
4390 return ((arm_address_register_rtx_p (xiop0, strict_p)
4391 && thumb2_index_mul_operand (xiop1))
4392 || (arm_address_register_rtx_p (xiop1, strict_p)
4393 && thumb2_index_mul_operand (xiop0)));
4395 else if (code == ASHIFT)
4397 rtx op = XEXP (index, 1);
4399 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4400 && GET_CODE (op) == CONST_INT
4402 && INTVAL (op) <= 3);
4405 return (code == CONST_INT
4406 && INTVAL (index) < 4096
4407 && INTVAL (index) > -256);
4410 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
4412 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
4416 if (GET_CODE (x) != REG)
4422 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
4424 return (regno <= LAST_LO_REGNUM
4425 || regno > LAST_VIRTUAL_REGISTER
4426 || regno == FRAME_POINTER_REGNUM
4427 || (GET_MODE_SIZE (mode) >= 4
4428 && (regno == STACK_POINTER_REGNUM
4429 || regno >= FIRST_PSEUDO_REGISTER
4430 || x == hard_frame_pointer_rtx
4431 || x == arg_pointer_rtx)));
4434 /* Return nonzero if x is a legitimate index register. This is the case
4435 for any base register that can access a QImode object. */
4437 thumb1_index_register_rtx_p (rtx x, int strict_p)
4439 return thumb1_base_register_rtx_p (x, QImode, strict_p);
4442 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
4444 The AP may be eliminated to either the SP or the FP, so we use the
4445 least common denominator, e.g. SImode, and offsets from 0 to 64.
4447 ??? Verify whether the above is the right approach.
4449 ??? Also, the FP may be eliminated to the SP, so perhaps that
4450 needs special handling also.
4452 ??? Look at how the mips16 port solves this problem. It probably uses
4453 better ways to solve some of these problems.
4455 Although it is not incorrect, we don't accept QImode and HImode
4456 addresses based on the frame pointer or arg pointer until the
4457 reload pass starts. This is so that eliminating such addresses
4458 into stack based ones won't produce impossible code. */
4460 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4462 /* ??? Not clear if this is right. Experiment. */
4463 if (GET_MODE_SIZE (mode) < 4
4464 && !(reload_in_progress || reload_completed)
4465 && (reg_mentioned_p (frame_pointer_rtx, x)
4466 || reg_mentioned_p (arg_pointer_rtx, x)
4467 || reg_mentioned_p (virtual_incoming_args_rtx, x)
4468 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
4469 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
4470 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
4473 /* Accept any base register. SP only in SImode or larger. */
4474 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
4477 /* This is PC relative data before arm_reorg runs. */
4478 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
4479 && GET_CODE (x) == SYMBOL_REF
4480 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
4483 /* This is PC relative data after arm_reorg runs. */
4484 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
4486 && (GET_CODE (x) == LABEL_REF
4487 || (GET_CODE (x) == CONST
4488 && GET_CODE (XEXP (x, 0)) == PLUS
4489 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4490 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4493 /* Post-inc indexing only supported for SImode and larger. */
4494 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
4495 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
4498 else if (GET_CODE (x) == PLUS)
4500 /* REG+REG address can be any two index registers. */
4501 /* We disallow FRAME+REG addressing since we know that FRAME
4502 will be replaced with STACK, and SP relative addressing only
4503 permits SP+OFFSET. */
4504 if (GET_MODE_SIZE (mode) <= 4
4505 && XEXP (x, 0) != frame_pointer_rtx
4506 && XEXP (x, 1) != frame_pointer_rtx
4507 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4508 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
4511 /* REG+const has 5-7 bit offset for non-SP registers. */
4512 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4513 || XEXP (x, 0) == arg_pointer_rtx)
4514 && GET_CODE (XEXP (x, 1)) == CONST_INT
4515 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4518 /* REG+const has 10-bit offset for SP, but only SImode and
4519 larger is supported. */
4520 /* ??? Should probably check for DI/DFmode overflow here
4521 just like GO_IF_LEGITIMATE_OFFSET does. */
4522 else if (GET_CODE (XEXP (x, 0)) == REG
4523 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
4524 && GET_MODE_SIZE (mode) >= 4
4525 && GET_CODE (XEXP (x, 1)) == CONST_INT
4526 && INTVAL (XEXP (x, 1)) >= 0
4527 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
4528 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4531 else if (GET_CODE (XEXP (x, 0)) == REG
4532 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
4533 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
4534 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
4535 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
4536 && GET_MODE_SIZE (mode) >= 4
4537 && GET_CODE (XEXP (x, 1)) == CONST_INT
4538 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4542 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4543 && GET_MODE_SIZE (mode) == 4
4544 && GET_CODE (x) == SYMBOL_REF
4545 && CONSTANT_POOL_ADDRESS_P (x)
4547 && symbol_mentioned_p (get_pool_constant (x))
4548 && ! pcrel_constant_p (get_pool_constant (x))))
4554 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
4555 instruction of mode MODE. */
4557 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
4559 switch (GET_MODE_SIZE (mode))
4562 return val >= 0 && val < 32;
4565 return val >= 0 && val < 64 && (val & 1) == 0;
4569 && (val + GET_MODE_SIZE (mode)) <= 128
4575 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
4578 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
4579 else if (TARGET_THUMB2)
4580 return thumb2_legitimate_address_p (mode, x, strict_p);
4581 else /* if (TARGET_THUMB1) */
4582 return thumb1_legitimate_address_p (mode, x, strict_p);
4585 /* Build the SYMBOL_REF for __tls_get_addr. */
4587 static GTY(()) rtx tls_get_addr_libfunc;
4590 get_tls_get_addr (void)
4592 if (!tls_get_addr_libfunc)
4593 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
4594 return tls_get_addr_libfunc;
4598 arm_load_tp (rtx target)
4601 target = gen_reg_rtx (SImode);
4605 /* Can return in any reg. */
4606 emit_insn (gen_load_tp_hard (target));
4610 /* Always returned in r0. Immediately copy the result into a pseudo,
4611 otherwise other uses of r0 (e.g. setting up function arguments) may
4612 clobber the value. */
4616 emit_insn (gen_load_tp_soft ());
4618 tmp = gen_rtx_REG (SImode, 0);
4619 emit_move_insn (target, tmp);
4625 load_tls_operand (rtx x, rtx reg)
4629 if (reg == NULL_RTX)
4630 reg = gen_reg_rtx (SImode);
4632 tmp = gen_rtx_CONST (SImode, x);
4634 emit_move_insn (reg, tmp);
4640 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
4642 rtx insns, label, labelno, sum;
4646 labelno = GEN_INT (pic_labelno++);
4647 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4648 label = gen_rtx_CONST (VOIDmode, label);
4650 sum = gen_rtx_UNSPEC (Pmode,
4651 gen_rtvec (4, x, GEN_INT (reloc), label,
4652 GEN_INT (TARGET_ARM ? 8 : 4)),
4654 reg = load_tls_operand (sum, reg);
4657 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
4658 else if (TARGET_THUMB2)
4661 /* Thumb-2 only allows very limited access to the PC. Calculate
4662 the address in a temporary register. */
4663 tmp = gen_reg_rtx (SImode);
4664 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4665 emit_insn (gen_addsi3(reg, reg, tmp));
4667 else /* TARGET_THUMB1 */
4668 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4670 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
4671 Pmode, 1, reg, Pmode);
4673 insns = get_insns ();
4680 legitimize_tls_address (rtx x, rtx reg)
4682 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
4683 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
4687 case TLS_MODEL_GLOBAL_DYNAMIC:
4688 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
4689 dest = gen_reg_rtx (Pmode);
4690 emit_libcall_block (insns, dest, ret, x);
4693 case TLS_MODEL_LOCAL_DYNAMIC:
4694 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
4696 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
4697 share the LDM result with other LD model accesses. */
4698 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
4700 dest = gen_reg_rtx (Pmode);
4701 emit_libcall_block (insns, dest, ret, eqv);
4703 /* Load the addend. */
4704 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
4706 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
4707 return gen_rtx_PLUS (Pmode, dest, addend);
4709 case TLS_MODEL_INITIAL_EXEC:
4710 labelno = GEN_INT (pic_labelno++);
4711 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4712 label = gen_rtx_CONST (VOIDmode, label);
4713 sum = gen_rtx_UNSPEC (Pmode,
4714 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
4715 GEN_INT (TARGET_ARM ? 8 : 4)),
4717 reg = load_tls_operand (sum, reg);
4720 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
4721 else if (TARGET_THUMB2)
4724 /* Thumb-2 only allows very limited access to the PC. Calculate
4725 the address in a temporary register. */
4726 tmp = gen_reg_rtx (SImode);
4727 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4728 emit_insn (gen_addsi3(reg, reg, tmp));
4729 emit_move_insn (reg, gen_const_mem (SImode, reg));
4733 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4734 emit_move_insn (reg, gen_const_mem (SImode, reg));
4737 tp = arm_load_tp (NULL_RTX);
4739 return gen_rtx_PLUS (Pmode, tp, reg);
4741 case TLS_MODEL_LOCAL_EXEC:
4742 tp = arm_load_tp (NULL_RTX);
4744 reg = gen_rtx_UNSPEC (Pmode,
4745 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
4747 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
4749 return gen_rtx_PLUS (Pmode, tp, reg);
4756 /* Try machine-dependent ways of modifying an illegitimate address
4757 to be legitimate. If we find one, return the new, valid address. */
4759 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4763 /* TODO: legitimize_address for Thumb2. */
4766 return thumb_legitimize_address (x, orig_x, mode);
4769 if (arm_tls_symbol_p (x))
4770 return legitimize_tls_address (x, NULL_RTX);
4772 if (GET_CODE (x) == PLUS)
4774 rtx xop0 = XEXP (x, 0);
4775 rtx xop1 = XEXP (x, 1);
4777 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4778 xop0 = force_reg (SImode, xop0);
4780 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4781 xop1 = force_reg (SImode, xop1);
4783 if (ARM_BASE_REGISTER_RTX_P (xop0)
4784 && GET_CODE (xop1) == CONST_INT)
4786 HOST_WIDE_INT n, low_n;
4790 /* VFP addressing modes actually allow greater offsets, but for
4791 now we just stick with the lowest common denominator. */
4793 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4805 low_n = ((mode) == TImode ? 0
4806 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4810 base_reg = gen_reg_rtx (SImode);
4811 val = force_operand (plus_constant (xop0, n), NULL_RTX);
4812 emit_move_insn (base_reg, val);
4813 x = plus_constant (base_reg, low_n);
4815 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4816 x = gen_rtx_PLUS (SImode, xop0, xop1);
4819 /* XXX We don't allow MINUS any more -- see comment in
4820 arm_legitimate_address_outer_p (). */
4821 else if (GET_CODE (x) == MINUS)
4823 rtx xop0 = XEXP (x, 0);
4824 rtx xop1 = XEXP (x, 1);
4826 if (CONSTANT_P (xop0))
4827 xop0 = force_reg (SImode, xop0);
4829 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4830 xop1 = force_reg (SImode, xop1);
4832 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4833 x = gen_rtx_MINUS (SImode, xop0, xop1);
4836 /* Make sure to take full advantage of the pre-indexed addressing mode
4837 with absolute addresses which often allows for the base register to
4838 be factorized for multiple adjacent memory references, and it might
4839 even allows for the mini pool to be avoided entirely. */
4840 else if (GET_CODE (x) == CONST_INT && optimize > 0)
4843 HOST_WIDE_INT mask, base, index;
4846 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
4847 use a 8-bit index. So let's use a 12-bit index for SImode only and
4848 hope that arm_gen_constant will enable ldrb to use more bits. */
4849 bits = (mode == SImode) ? 12 : 8;
4850 mask = (1 << bits) - 1;
4851 base = INTVAL (x) & ~mask;
4852 index = INTVAL (x) & mask;
4853 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4855 /* It'll most probably be more efficient to generate the base
4856 with more bits set and use a negative index instead. */
4860 base_reg = force_reg (SImode, GEN_INT (base));
4861 x = plus_constant (base_reg, index);
4866 /* We need to find and carefully transform any SYMBOL and LABEL
4867 references; so go back to the original address expression. */
4868 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4870 if (new_x != orig_x)
4878 /* Try machine-dependent ways of modifying an illegitimate Thumb address
4879 to be legitimate. If we find one, return the new, valid address. */
4881 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4883 if (arm_tls_symbol_p (x))
4884 return legitimize_tls_address (x, NULL_RTX);
4886 if (GET_CODE (x) == PLUS
4887 && GET_CODE (XEXP (x, 1)) == CONST_INT
4888 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4889 || INTVAL (XEXP (x, 1)) < 0))
4891 rtx xop0 = XEXP (x, 0);
4892 rtx xop1 = XEXP (x, 1);
4893 HOST_WIDE_INT offset = INTVAL (xop1);
4895 /* Try and fold the offset into a biasing of the base register and
4896 then offsetting that. Don't do this when optimizing for space
4897 since it can cause too many CSEs. */
4898 if (optimize_size && offset >= 0
4899 && offset < 256 + 31 * GET_MODE_SIZE (mode))
4901 HOST_WIDE_INT delta;
4904 delta = offset - (256 - GET_MODE_SIZE (mode));
4905 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4906 delta = 31 * GET_MODE_SIZE (mode);
4908 delta = offset & (~31 * GET_MODE_SIZE (mode));
4910 xop0 = force_operand (plus_constant (xop0, offset - delta),
4912 x = plus_constant (xop0, delta);
4914 else if (offset < 0 && offset > -256)
4915 /* Small negative offsets are best done with a subtract before the
4916 dereference, forcing these into a register normally takes two
4918 x = force_operand (x, NULL_RTX);
4921 /* For the remaining cases, force the constant into a register. */
4922 xop1 = force_reg (SImode, xop1);
4923 x = gen_rtx_PLUS (SImode, xop0, xop1);
4926 else if (GET_CODE (x) == PLUS
4927 && s_register_operand (XEXP (x, 1), SImode)
4928 && !s_register_operand (XEXP (x, 0), SImode))
4930 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4932 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4937 /* We need to find and carefully transform any SYMBOL and LABEL
4938 references; so go back to the original address expression. */
4939 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4941 if (new_x != orig_x)
4949 thumb_legitimize_reload_address (rtx *x_p,
4950 enum machine_mode mode,
4951 int opnum, int type,
4952 int ind_levels ATTRIBUTE_UNUSED)
4956 if (GET_CODE (x) == PLUS
4957 && GET_MODE_SIZE (mode) < 4
4958 && REG_P (XEXP (x, 0))
4959 && XEXP (x, 0) == stack_pointer_rtx
4960 && GET_CODE (XEXP (x, 1)) == CONST_INT
4961 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4966 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4967 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
4971 /* If both registers are hi-regs, then it's better to reload the
4972 entire expression rather than each register individually. That
4973 only requires one reload register rather than two. */
4974 if (GET_CODE (x) == PLUS
4975 && REG_P (XEXP (x, 0))
4976 && REG_P (XEXP (x, 1))
4977 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4978 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4983 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4984 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
4991 /* Test for various thread-local symbols. */
4993 /* Return TRUE if X is a thread-local symbol. */
4996 arm_tls_symbol_p (rtx x)
4998 if (! TARGET_HAVE_TLS)
5001 if (GET_CODE (x) != SYMBOL_REF)
5004 return SYMBOL_REF_TLS_MODEL (x) != 0;
5007 /* Helper for arm_tls_referenced_p. */
5010 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
5012 if (GET_CODE (*x) == SYMBOL_REF)
5013 return SYMBOL_REF_TLS_MODEL (*x) != 0;
5015 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
5016 TLS offsets, not real symbol references. */
5017 if (GET_CODE (*x) == UNSPEC
5018 && XINT (*x, 1) == UNSPEC_TLS)
5024 /* Return TRUE if X contains any TLS symbol references. */
5027 arm_tls_referenced_p (rtx x)
5029 if (! TARGET_HAVE_TLS)
5032 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
5035 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
5038 arm_cannot_force_const_mem (rtx x)
5042 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
5044 split_const (x, &base, &offset);
5045 if (GET_CODE (base) == SYMBOL_REF
5046 && !offset_within_block_p (base, INTVAL (offset)))
5049 return arm_tls_referenced_p (x);
5052 #define REG_OR_SUBREG_REG(X) \
5053 (GET_CODE (X) == REG \
5054 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
5056 #define REG_OR_SUBREG_RTX(X) \
5057 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
5059 #ifndef COSTS_N_INSNS
5060 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
5063 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
5065 enum machine_mode mode = GET_MODE (x);
5078 return COSTS_N_INSNS (1);
5081 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5084 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
5091 return COSTS_N_INSNS (2) + cycles;
5093 return COSTS_N_INSNS (1) + 16;
5096 return (COSTS_N_INSNS (1)
5097 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
5098 + GET_CODE (SET_DEST (x)) == MEM));
5103 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
5105 if (thumb_shiftable_const (INTVAL (x)))
5106 return COSTS_N_INSNS (2);
5107 return COSTS_N_INSNS (3);
5109 else if ((outer == PLUS || outer == COMPARE)
5110 && INTVAL (x) < 256 && INTVAL (x) > -256)
5112 else if (outer == AND
5113 && INTVAL (x) < 256 && INTVAL (x) >= -256)
5114 return COSTS_N_INSNS (1);
5115 else if (outer == ASHIFT || outer == ASHIFTRT
5116 || outer == LSHIFTRT)
5118 return COSTS_N_INSNS (2);
5124 return COSTS_N_INSNS (3);
5142 /* XXX another guess. */
5143 /* Memory costs quite a lot for the first word, but subsequent words
5144 load at the equivalent of a single insn each. */
5145 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
5146 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
5151 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5156 /* XXX still guessing. */
5157 switch (GET_MODE (XEXP (x, 0)))
5160 return (1 + (mode == DImode ? 4 : 0)
5161 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5164 return (4 + (mode == DImode ? 4 : 0)
5165 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5168 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5180 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
5182 enum machine_mode mode = GET_MODE (x);
5183 enum rtx_code subcode;
5185 enum rtx_code code = GET_CODE (x);
5192 /* Memory costs quite a lot for the first word, but subsequent words
5193 load at the equivalent of a single insn each. */
5194 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
5201 if (TARGET_HARD_FLOAT && mode == SFmode)
5202 *total = COSTS_N_INSNS (2);
5203 else if (TARGET_HARD_FLOAT && mode == DFmode)
5204 *total = COSTS_N_INSNS (4);
5206 *total = COSTS_N_INSNS (20);
5210 if (GET_CODE (XEXP (x, 1)) == REG)
5211 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
5212 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5213 *total = rtx_cost (XEXP (x, 1), code, speed);
5219 *total += COSTS_N_INSNS (4);
5224 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
5225 *total += rtx_cost (XEXP (x, 0), code, speed);
5228 *total += COSTS_N_INSNS (3);
5232 *total += COSTS_N_INSNS (1);
5233 /* Increase the cost of complex shifts because they aren't any faster,
5234 and reduce dual issue opportunities. */
5235 if (arm_tune_cortex_a9
5236 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
5244 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5246 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5247 *total = COSTS_N_INSNS (1);
5249 *total = COSTS_N_INSNS (20);
5252 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5253 /* Thumb2 does not have RSB, so all arguments must be
5254 registers (subtracting a constant is canonicalized as
5255 addition of the negated constant). */
5261 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5262 if (GET_CODE (XEXP (x, 0)) == CONST_INT
5263 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
5265 *total += rtx_cost (XEXP (x, 1), code, speed);
5269 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5270 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
5272 *total += rtx_cost (XEXP (x, 0), code, speed);
5279 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5281 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5283 *total = COSTS_N_INSNS (1);
5284 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
5285 && arm_const_double_rtx (XEXP (x, 0)))
5287 *total += rtx_cost (XEXP (x, 1), code, speed);
5291 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
5292 && arm_const_double_rtx (XEXP (x, 1)))
5294 *total += rtx_cost (XEXP (x, 0), code, speed);
5300 *total = COSTS_N_INSNS (20);
5304 *total = COSTS_N_INSNS (1);
5305 if (GET_CODE (XEXP (x, 0)) == CONST_INT
5306 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
5308 *total += rtx_cost (XEXP (x, 1), code, speed);
5312 subcode = GET_CODE (XEXP (x, 1));
5313 if (subcode == ASHIFT || subcode == ASHIFTRT
5314 || subcode == LSHIFTRT
5315 || subcode == ROTATE || subcode == ROTATERT)
5317 *total += rtx_cost (XEXP (x, 0), code, speed);
5318 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
5322 /* A shift as a part of RSB costs no more than RSB itself. */
5323 if (GET_CODE (XEXP (x, 0)) == MULT
5324 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5326 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
5327 *total += rtx_cost (XEXP (x, 1), code, speed);
5332 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
5334 *total += rtx_cost (XEXP (x, 0), code, speed);
5335 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
5339 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
5340 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
5342 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
5343 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
5344 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
5345 *total += COSTS_N_INSNS (1);
5353 if (code == PLUS && arm_arch6 && mode == SImode
5354 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5355 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5357 *total = COSTS_N_INSNS (1);
5358 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
5360 *total += rtx_cost (XEXP (x, 1), code, speed);
5364 /* MLA: All arguments must be registers. We filter out
5365 multiplication by a power of two, so that we fall down into
5367 if (GET_CODE (XEXP (x, 0)) == MULT
5368 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5370 /* The cost comes from the cost of the multiply. */
5374 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5376 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5378 *total = COSTS_N_INSNS (1);
5379 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
5380 && arm_const_double_rtx (XEXP (x, 1)))
5382 *total += rtx_cost (XEXP (x, 0), code, speed);
5389 *total = COSTS_N_INSNS (20);
5393 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
5394 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
5396 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
5397 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5398 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
5399 *total += COSTS_N_INSNS (1);
5405 case AND: case XOR: case IOR:
5408 /* Normally the frame registers will be spilt into reg+const during
5409 reload, so it is a bad idea to combine them with other instructions,
5410 since then they might not be moved outside of loops. As a compromise
5411 we allow integration with ops that have a constant as their second
5413 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
5414 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
5415 && GET_CODE (XEXP (x, 1)) != CONST_INT)
5416 || (REG_OR_SUBREG_REG (XEXP (x, 0))
5417 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
5422 *total += COSTS_N_INSNS (2);
5423 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5424 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
5426 *total += rtx_cost (XEXP (x, 0), code, speed);
5433 *total += COSTS_N_INSNS (1);
5434 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5435 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
5437 *total += rtx_cost (XEXP (x, 0), code, speed);
5440 subcode = GET_CODE (XEXP (x, 0));
5441 if (subcode == ASHIFT || subcode == ASHIFTRT
5442 || subcode == LSHIFTRT
5443 || subcode == ROTATE || subcode == ROTATERT)
5445 *total += rtx_cost (XEXP (x, 1), code, speed);
5446 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5451 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5453 *total += rtx_cost (XEXP (x, 1), code, speed);
5454 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5458 if (subcode == UMIN || subcode == UMAX
5459 || subcode == SMIN || subcode == SMAX)
5461 *total = COSTS_N_INSNS (3);
5468 /* This should have been handled by the CPU specific routines. */
5472 if (arm_arch3m && mode == SImode
5473 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5474 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5475 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
5476 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
5477 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5478 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
5480 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
5483 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
5487 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5489 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5491 *total = COSTS_N_INSNS (1);
5494 *total = COSTS_N_INSNS (2);
5500 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
5501 if (mode == SImode && code == NOT)
5503 subcode = GET_CODE (XEXP (x, 0));
5504 if (subcode == ASHIFT || subcode == ASHIFTRT
5505 || subcode == LSHIFTRT
5506 || subcode == ROTATE || subcode == ROTATERT
5508 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
5510 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5511 /* Register shifts cost an extra cycle. */
5512 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
5513 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
5522 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5524 *total = COSTS_N_INSNS (4);
5528 operand = XEXP (x, 0);
5530 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
5531 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
5532 && GET_CODE (XEXP (operand, 0)) == REG
5533 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
5534 *total += COSTS_N_INSNS (1);
5535 *total += (rtx_cost (XEXP (x, 1), code, speed)
5536 + rtx_cost (XEXP (x, 2), code, speed));
5540 if (mode == SImode && XEXP (x, 1) == const0_rtx)
5542 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
5548 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
5549 && mode == SImode && XEXP (x, 1) == const0_rtx)
5551 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
5557 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
5558 && mode == SImode && XEXP (x, 1) == const0_rtx)
5560 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
5580 /* SCC insns. In the case where the comparison has already been
5581 performed, then they cost 2 instructions. Otherwise they need
5582 an additional comparison before them. */
5583 *total = COSTS_N_INSNS (2);
5584 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
5591 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
5597 *total += COSTS_N_INSNS (1);
5598 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5599 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
5601 *total += rtx_cost (XEXP (x, 0), code, speed);
5605 subcode = GET_CODE (XEXP (x, 0));
5606 if (subcode == ASHIFT || subcode == ASHIFTRT
5607 || subcode == LSHIFTRT
5608 || subcode == ROTATE || subcode == ROTATERT)
5610 *total += rtx_cost (XEXP (x, 1), code, speed);
5611 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5616 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5618 *total += rtx_cost (XEXP (x, 1), code, speed);
5619 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5629 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
5630 if (GET_CODE (XEXP (x, 1)) != CONST_INT
5631 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
5632 *total += rtx_cost (XEXP (x, 1), code, speed);
5636 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5638 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5640 *total = COSTS_N_INSNS (1);
5643 *total = COSTS_N_INSNS (20);
5646 *total = COSTS_N_INSNS (1);
5648 *total += COSTS_N_INSNS (3);
5652 if (GET_MODE_CLASS (mode) == MODE_INT)
5656 *total += COSTS_N_INSNS (1);
5658 if (GET_MODE (XEXP (x, 0)) != SImode)
5662 if (GET_CODE (XEXP (x, 0)) != MEM)
5663 *total += COSTS_N_INSNS (1);
5665 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
5666 *total += COSTS_N_INSNS (2);
5675 if (GET_MODE_CLASS (mode) == MODE_INT)
5678 *total += COSTS_N_INSNS (1);
5680 if (GET_MODE (XEXP (x, 0)) != SImode)
5684 if (GET_CODE (XEXP (x, 0)) != MEM)
5685 *total += COSTS_N_INSNS (1);
5687 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
5688 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
5695 switch (GET_MODE (XEXP (x, 0)))
5702 *total = COSTS_N_INSNS (1);
5712 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
5716 if (const_ok_for_arm (INTVAL (x))
5717 || const_ok_for_arm (~INTVAL (x)))
5718 *total = COSTS_N_INSNS (1);
5720 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
5721 INTVAL (x), NULL_RTX,
5728 *total = COSTS_N_INSNS (3);
5732 *total = COSTS_N_INSNS (1);
5736 *total = COSTS_N_INSNS (1);
5737 *total += rtx_cost (XEXP (x, 0), code, speed);
5741 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x))
5742 *total = COSTS_N_INSNS (1);
5744 *total = COSTS_N_INSNS (4);
5748 *total = COSTS_N_INSNS (4);
5753 /* RTX costs when optimizing for size. */
5755 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
5758 enum machine_mode mode = GET_MODE (x);
5761 /* XXX TBD. For now, use the standard costs. */
5762 *total = thumb1_rtx_costs (x, code, outer_code);
5766 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
5770 /* A memory access costs 1 insn if the mode is small, or the address is
5771 a single register, otherwise it costs one insn per word. */
5772 if (REG_P (XEXP (x, 0)))
5773 *total = COSTS_N_INSNS (1);
5775 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5782 /* Needs a libcall, so it costs about this. */
5783 *total = COSTS_N_INSNS (2);
5787 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
5789 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
5797 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
5799 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
5802 else if (mode == SImode)
5804 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
5805 /* Slightly disparage register shifts, but not by much. */
5806 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5807 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
5811 /* Needs a libcall. */
5812 *total = COSTS_N_INSNS (2);
5816 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5818 *total = COSTS_N_INSNS (1);
5824 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
5825 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
5827 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
5828 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
5829 || subcode1 == ROTATE || subcode1 == ROTATERT
5830 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
5831 || subcode1 == ASHIFTRT)
5833 /* It's just the cost of the two operands. */
5838 *total = COSTS_N_INSNS (1);
5842 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5846 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5848 *total = COSTS_N_INSNS (1);
5852 /* A shift as a part of ADD costs nothing. */
5853 if (GET_CODE (XEXP (x, 0)) == MULT
5854 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5856 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
5857 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
5858 *total += rtx_cost (XEXP (x, 1), code, false);
5863 case AND: case XOR: case IOR:
5866 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
5868 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
5869 || subcode == LSHIFTRT || subcode == ASHIFTRT
5870 || (code == AND && subcode == NOT))
5872 /* It's just the cost of the two operands. */
5878 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5882 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5886 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5888 *total = COSTS_N_INSNS (1);
5894 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5903 if (cc_register (XEXP (x, 0), VOIDmode))
5906 *total = COSTS_N_INSNS (1);
5910 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5911 *total = COSTS_N_INSNS (1);
5913 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
5918 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
5920 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5921 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5924 *total += COSTS_N_INSNS (1);
5929 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5931 switch (GET_MODE (XEXP (x, 0)))
5934 *total += COSTS_N_INSNS (1);
5938 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5944 *total += COSTS_N_INSNS (2);
5949 *total += COSTS_N_INSNS (1);
5954 if (const_ok_for_arm (INTVAL (x)))
5955 /* A multiplication by a constant requires another instruction
5956 to load the constant to a register. */
5957 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
5959 else if (const_ok_for_arm (~INTVAL (x)))
5960 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
5961 else if (const_ok_for_arm (-INTVAL (x)))
5963 if (outer_code == COMPARE || outer_code == PLUS
5964 || outer_code == MINUS)
5967 *total = COSTS_N_INSNS (1);
5970 *total = COSTS_N_INSNS (2);
5976 *total = COSTS_N_INSNS (2);
5980 *total = COSTS_N_INSNS (4);
5985 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
5986 cost of these slightly. */
5987 *total = COSTS_N_INSNS (1) + 1;
5991 if (mode != VOIDmode)
5992 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5994 *total = COSTS_N_INSNS (4); /* How knows? */
5999 /* RTX costs when optimizing for size. */
6001 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
6005 return arm_size_rtx_costs (x, (enum rtx_code) code,
6006 (enum rtx_code) outer_code, total);
6008 return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code,
6009 (enum rtx_code) outer_code,
6013 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
6014 supported on any "slowmul" cores, so it can be ignored. */
6017 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6018 int *total, bool speed)
6020 enum machine_mode mode = GET_MODE (x);
6024 *total = thumb1_rtx_costs (x, code, outer_code);
6031 if (GET_MODE_CLASS (mode) == MODE_FLOAT
6034 *total = COSTS_N_INSNS (20);
6038 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6040 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
6041 & (unsigned HOST_WIDE_INT) 0xffffffff);
6042 int cost, const_ok = const_ok_for_arm (i);
6043 int j, booth_unit_size;
6045 /* Tune as appropriate. */
6046 cost = const_ok ? 4 : 8;
6047 booth_unit_size = 2;
6048 for (j = 0; i && j < 32; j += booth_unit_size)
6050 i >>= booth_unit_size;
6054 *total = COSTS_N_INSNS (cost);
6055 *total += rtx_cost (XEXP (x, 0), code, speed);
6059 *total = COSTS_N_INSNS (20);
6063 return arm_rtx_costs_1 (x, outer_code, total, speed);;
6068 /* RTX cost for cores with a fast multiply unit (M variants). */
6071 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6072 int *total, bool speed)
6074 enum machine_mode mode = GET_MODE (x);
6078 *total = thumb1_rtx_costs (x, code, outer_code);
6082 /* ??? should thumb2 use different costs? */
6086 /* There is no point basing this on the tuning, since it is always the
6087 fast variant if it exists at all. */
6089 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
6090 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6091 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6093 *total = COSTS_N_INSNS(2);
6100 *total = COSTS_N_INSNS (5);
6104 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6106 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
6107 & (unsigned HOST_WIDE_INT) 0xffffffff);
6108 int cost, const_ok = const_ok_for_arm (i);
6109 int j, booth_unit_size;
6111 /* Tune as appropriate. */
6112 cost = const_ok ? 4 : 8;
6113 booth_unit_size = 8;
6114 for (j = 0; i && j < 32; j += booth_unit_size)
6116 i >>= booth_unit_size;
6120 *total = COSTS_N_INSNS(cost);
6126 *total = COSTS_N_INSNS (4);
6130 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6132 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6134 *total = COSTS_N_INSNS (1);
6139 /* Requires a lib call */
6140 *total = COSTS_N_INSNS (20);
6144 return arm_rtx_costs_1 (x, outer_code, total, speed);
6149 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
6150 so it can be ignored. */
6153 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
6155 enum machine_mode mode = GET_MODE (x);
6159 *total = thumb1_rtx_costs (x, code, outer_code);
6166 if (GET_CODE (XEXP (x, 0)) != MULT)
6167 return arm_rtx_costs_1 (x, outer_code, total, speed);
6169 /* A COMPARE of a MULT is slow on XScale; the muls instruction
6170 will stall until the multiplication is complete. */
6171 *total = COSTS_N_INSNS (3);
6175 /* There is no point basing this on the tuning, since it is always the
6176 fast variant if it exists at all. */
6178 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
6179 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6180 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6182 *total = COSTS_N_INSNS (2);
6189 *total = COSTS_N_INSNS (5);
6193 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6195 /* If operand 1 is a constant we can more accurately
6196 calculate the cost of the multiply. The multiplier can
6197 retire 15 bits on the first cycle and a further 12 on the
6198 second. We do, of course, have to load the constant into
6199 a register first. */
6200 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6201 /* There's a general overhead of one cycle. */
6203 unsigned HOST_WIDE_INT masked_const;
6208 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
6210 masked_const = i & 0xffff8000;
6211 if (masked_const != 0)
6214 masked_const = i & 0xf8000000;
6215 if (masked_const != 0)
6218 *total = COSTS_N_INSNS (cost);
6224 *total = COSTS_N_INSNS (3);
6228 /* Requires a lib call */
6229 *total = COSTS_N_INSNS (20);
6233 return arm_rtx_costs_1 (x, outer_code, total, speed);
6238 /* RTX costs for 9e (and later) cores. */
6241 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6242 int *total, bool speed)
6244 enum machine_mode mode = GET_MODE (x);
6251 *total = COSTS_N_INSNS (3);
6255 *total = thumb1_rtx_costs (x, code, outer_code);
6263 /* There is no point basing this on the tuning, since it is always the
6264 fast variant if it exists at all. */
6266 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
6267 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6268 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6270 *total = COSTS_N_INSNS (2);
6277 *total = COSTS_N_INSNS (5);
6283 *total = COSTS_N_INSNS (2);
6287 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6289 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6291 *total = COSTS_N_INSNS (1);
6296 *total = COSTS_N_INSNS (20);
6300 return arm_rtx_costs_1 (x, outer_code, total, speed);
6303 /* All address computations that can be done are free, but rtx cost returns
6304 the same for practically all of them. So we weight the different types
6305 of address here in the order (most pref first):
6306 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
6308 arm_arm_address_cost (rtx x)
6310 enum rtx_code c = GET_CODE (x);
6312 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
6314 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
6317 if (c == PLUS || c == MINUS)
6319 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6322 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
6332 arm_thumb_address_cost (rtx x)
6334 enum rtx_code c = GET_CODE (x);
6339 && GET_CODE (XEXP (x, 0)) == REG
6340 && GET_CODE (XEXP (x, 1)) == CONST_INT)
6347 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
6349 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
6353 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
6357 /* Some true dependencies can have a higher cost depending
6358 on precisely how certain input operands are used. */
6360 && REG_NOTE_KIND (link) == 0
6361 && recog_memoized (insn) >= 0
6362 && recog_memoized (dep) >= 0)
6364 int shift_opnum = get_attr_shift (insn);
6365 enum attr_type attr_type = get_attr_type (dep);
6367 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
6368 operand for INSN. If we have a shifted input operand and the
6369 instruction we depend on is another ALU instruction, then we may
6370 have to account for an additional stall. */
6371 if (shift_opnum != 0
6372 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
6374 rtx shifted_operand;
6377 /* Get the shifted operand. */
6378 extract_insn (insn);
6379 shifted_operand = recog_data.operand[shift_opnum];
6381 /* Iterate over all the operands in DEP. If we write an operand
6382 that overlaps with SHIFTED_OPERAND, then we have increase the
6383 cost of this dependency. */
6385 preprocess_constraints ();
6386 for (opno = 0; opno < recog_data.n_operands; opno++)
6388 /* We can ignore strict inputs. */
6389 if (recog_data.operand_type[opno] == OP_IN)
6392 if (reg_overlap_mentioned_p (recog_data.operand[opno],
6399 /* XXX This is not strictly true for the FPA. */
6400 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
6401 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
6404 /* Call insns don't incur a stall, even if they follow a load. */
6405 if (REG_NOTE_KIND (link) == 0
6406 && GET_CODE (insn) == CALL_INSN)
6409 if ((i_pat = single_set (insn)) != NULL
6410 && GET_CODE (SET_SRC (i_pat)) == MEM
6411 && (d_pat = single_set (dep)) != NULL
6412 && GET_CODE (SET_DEST (d_pat)) == MEM)
6414 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
6415 /* This is a load after a store, there is no conflict if the load reads
6416 from a cached area. Assume that loads from the stack, and from the
6417 constant pool are cached, and that others will miss. This is a
6420 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
6421 || reg_mentioned_p (stack_pointer_rtx, src_mem)
6422 || reg_mentioned_p (frame_pointer_rtx, src_mem)
6423 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
6430 static int fp_consts_inited = 0;
6432 /* Only zero is valid for VFP. Other values are also valid for FPA. */
6433 static const char * const strings_fp[8] =
6436 "4", "5", "0.5", "10"
6439 static REAL_VALUE_TYPE values_fp[8];
6442 init_fp_table (void)
6448 fp_consts_inited = 1;
6450 fp_consts_inited = 8;
6452 for (i = 0; i < fp_consts_inited; i++)
6454 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
6459 /* Return TRUE if rtx X is a valid immediate FP constant. */
6461 arm_const_double_rtx (rtx x)
6466 if (!fp_consts_inited)
6469 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6470 if (REAL_VALUE_MINUS_ZERO (r))
6473 for (i = 0; i < fp_consts_inited; i++)
6474 if (REAL_VALUES_EQUAL (r, values_fp[i]))
6480 /* Return TRUE if rtx X is a valid immediate FPA constant. */
6482 neg_const_double_rtx_ok_for_fpa (rtx x)
6487 if (!fp_consts_inited)
6490 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6491 r = REAL_VALUE_NEGATE (r);
6492 if (REAL_VALUE_MINUS_ZERO (r))
6495 for (i = 0; i < 8; i++)
6496 if (REAL_VALUES_EQUAL (r, values_fp[i]))
6503 /* VFPv3 has a fairly wide range of representable immediates, formed from
6504 "quarter-precision" floating-point values. These can be evaluated using this
6505 formula (with ^ for exponentiation):
6509 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
6510 16 <= n <= 31 and 0 <= r <= 7.
6512 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
6514 - A (most-significant) is the sign bit.
6515 - BCD are the exponent (encoded as r XOR 3).
6516 - EFGH are the mantissa (encoded as n - 16).
6519 /* Return an integer index for a VFPv3 immediate operand X suitable for the
6520 fconst[sd] instruction, or -1 if X isn't suitable. */
6522 vfp3_const_double_index (rtx x)
6524 REAL_VALUE_TYPE r, m;
6526 unsigned HOST_WIDE_INT mantissa, mant_hi;
6527 unsigned HOST_WIDE_INT mask;
6528 HOST_WIDE_INT m1, m2;
6529 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
6531 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
6534 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6536 /* We can't represent these things, so detect them first. */
6537 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
6540 /* Extract sign, exponent and mantissa. */
6541 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
6542 r = REAL_VALUE_ABS (r);
6543 exponent = REAL_EXP (&r);
6544 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
6545 highest (sign) bit, with a fixed binary point at bit point_pos.
6546 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
6547 bits for the mantissa, this may fail (low bits would be lost). */
6548 real_ldexp (&m, &r, point_pos - exponent);
6549 REAL_VALUE_TO_INT (&m1, &m2, m);
6553 /* If there are bits set in the low part of the mantissa, we can't
6554 represent this value. */
6558 /* Now make it so that mantissa contains the most-significant bits, and move
6559 the point_pos to indicate that the least-significant bits have been
6561 point_pos -= HOST_BITS_PER_WIDE_INT;
6564 /* We can permit four significant bits of mantissa only, plus a high bit
6565 which is always 1. */
6566 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
6567 if ((mantissa & mask) != 0)
6570 /* Now we know the mantissa is in range, chop off the unneeded bits. */
6571 mantissa >>= point_pos - 5;
6573 /* The mantissa may be zero. Disallow that case. (It's possible to load the
6574 floating-point immediate zero with Neon using an integer-zero load, but
6575 that case is handled elsewhere.) */
6579 gcc_assert (mantissa >= 16 && mantissa <= 31);
6581 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
6582 normalized significands are in the range [1, 2). (Our mantissa is shifted
6583 left 4 places at this point relative to normalized IEEE754 values). GCC
6584 internally uses [0.5, 1) (see real.c), so the exponent returned from
6585 REAL_EXP must be altered. */
6586 exponent = 5 - exponent;
6588 if (exponent < 0 || exponent > 7)
6591 /* Sign, mantissa and exponent are now in the correct form to plug into the
6592 formula described in the comment above. */
6593 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
6596 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
6598 vfp3_const_double_rtx (rtx x)
6603 return vfp3_const_double_index (x) != -1;
6606 /* Recognize immediates which can be used in various Neon instructions. Legal
6607 immediates are described by the following table (for VMVN variants, the
6608 bitwise inverse of the constant shown is recognized. In either case, VMOV
6609 is output and the correct instruction to use for a given constant is chosen
6610 by the assembler). The constant shown is replicated across all elements of
6611 the destination vector.
6613 insn elems variant constant (binary)
6614 ---- ----- ------- -----------------
6615 vmov i32 0 00000000 00000000 00000000 abcdefgh
6616 vmov i32 1 00000000 00000000 abcdefgh 00000000
6617 vmov i32 2 00000000 abcdefgh 00000000 00000000
6618 vmov i32 3 abcdefgh 00000000 00000000 00000000
6619 vmov i16 4 00000000 abcdefgh
6620 vmov i16 5 abcdefgh 00000000
6621 vmvn i32 6 00000000 00000000 00000000 abcdefgh
6622 vmvn i32 7 00000000 00000000 abcdefgh 00000000
6623 vmvn i32 8 00000000 abcdefgh 00000000 00000000
6624 vmvn i32 9 abcdefgh 00000000 00000000 00000000
6625 vmvn i16 10 00000000 abcdefgh
6626 vmvn i16 11 abcdefgh 00000000
6627 vmov i32 12 00000000 00000000 abcdefgh 11111111
6628 vmvn i32 13 00000000 00000000 abcdefgh 11111111
6629 vmov i32 14 00000000 abcdefgh 11111111 11111111
6630 vmvn i32 15 00000000 abcdefgh 11111111 11111111
6632 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
6633 eeeeeeee ffffffff gggggggg hhhhhhhh
6634 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
6636 For case 18, B = !b. Representable values are exactly those accepted by
6637 vfp3_const_double_index, but are output as floating-point numbers rather
6640 Variants 0-5 (inclusive) may also be used as immediates for the second
6641 operand of VORR/VBIC instructions.
6643 The INVERSE argument causes the bitwise inverse of the given operand to be
6644 recognized instead (used for recognizing legal immediates for the VAND/VORN
6645 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
6646 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
6647 output, rather than the real insns vbic/vorr).
6649 INVERSE makes no difference to the recognition of float vectors.
6651 The return value is the variant of immediate as shown in the above table, or
6652 -1 if the given value doesn't match any of the listed patterns.
6655 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
6656 rtx *modconst, int *elementwidth)
6658 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
6660 for (i = 0; i < idx; i += (STRIDE)) \
6665 immtype = (CLASS); \
6666 elsize = (ELSIZE); \
6670 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6671 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6672 unsigned char bytes[16];
6673 int immtype = -1, matches;
6674 unsigned int invmask = inverse ? 0xff : 0;
6676 /* Vectors of float constants. */
6677 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6679 rtx el0 = CONST_VECTOR_ELT (op, 0);
6682 if (!vfp3_const_double_rtx (el0))
6685 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
6687 for (i = 1; i < n_elts; i++)
6689 rtx elt = CONST_VECTOR_ELT (op, i);
6692 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
6694 if (!REAL_VALUES_EQUAL (r0, re))
6699 *modconst = CONST_VECTOR_ELT (op, 0);
6707 /* Splat vector constant out into a byte vector. */
6708 for (i = 0; i < n_elts; i++)
6710 rtx el = CONST_VECTOR_ELT (op, i);
6711 unsigned HOST_WIDE_INT elpart;
6712 unsigned int part, parts;
6714 if (GET_CODE (el) == CONST_INT)
6716 elpart = INTVAL (el);
6719 else if (GET_CODE (el) == CONST_DOUBLE)
6721 elpart = CONST_DOUBLE_LOW (el);
6727 for (part = 0; part < parts; part++)
6730 for (byte = 0; byte < innersize; byte++)
6732 bytes[idx++] = (elpart & 0xff) ^ invmask;
6733 elpart >>= BITS_PER_UNIT;
6735 if (GET_CODE (el) == CONST_DOUBLE)
6736 elpart = CONST_DOUBLE_HIGH (el);
6741 gcc_assert (idx == GET_MODE_SIZE (mode));
6745 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6746 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6748 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6749 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6751 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6752 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6754 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6755 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
6757 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
6759 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
6761 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6762 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6764 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6765 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6767 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6768 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6770 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6771 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
6773 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
6775 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
6777 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6778 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6780 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6781 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6783 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6784 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6786 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6787 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6789 CHECK (1, 8, 16, bytes[i] == bytes[0]);
6791 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6792 && bytes[i] == bytes[(i + 8) % idx]);
6800 *elementwidth = elsize;
6804 unsigned HOST_WIDE_INT imm = 0;
6806 /* Un-invert bytes of recognized vector, if necessary. */
6808 for (i = 0; i < idx; i++)
6809 bytes[i] ^= invmask;
6813 /* FIXME: Broken on 32-bit H_W_I hosts. */
6814 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6816 for (i = 0; i < 8; i++)
6817 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6818 << (i * BITS_PER_UNIT);
6820 *modconst = GEN_INT (imm);
6824 unsigned HOST_WIDE_INT imm = 0;
6826 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6827 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6829 *modconst = GEN_INT (imm);
6837 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
6838 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
6839 float elements), and a modified constant (whatever should be output for a
6840 VMOV) in *MODCONST. */
6843 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
6844 rtx *modconst, int *elementwidth)
6848 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
6854 *modconst = tmpconst;
6857 *elementwidth = tmpwidth;
6862 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
6863 the immediate is valid, write a constant suitable for using as an operand
6864 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
6865 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
6868 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
6869 rtx *modconst, int *elementwidth)
6873 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
6875 if (retval < 0 || retval > 5)
6879 *modconst = tmpconst;
6882 *elementwidth = tmpwidth;
6887 /* Return a string suitable for output of Neon immediate logic operation
6891 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
6892 int inverse, int quad)
6894 int width, is_valid;
6895 static char templ[40];
6897 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
6899 gcc_assert (is_valid != 0);
6902 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
6904 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
6909 /* Output a sequence of pairwise operations to implement a reduction.
6910 NOTE: We do "too much work" here, because pairwise operations work on two
6911 registers-worth of operands in one go. Unfortunately we can't exploit those
6912 extra calculations to do the full operation in fewer steps, I don't think.
6913 Although all vector elements of the result but the first are ignored, we
6914 actually calculate the same result in each of the elements. An alternative
6915 such as initially loading a vector with zero to use as each of the second
6916 operands would use up an additional register and take an extra instruction,
6917 for no particular gain. */
6920 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
6921 rtx (*reduc) (rtx, rtx, rtx))
6923 enum machine_mode inner = GET_MODE_INNER (mode);
6924 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
6927 for (i = parts / 2; i >= 1; i /= 2)
6929 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
6930 emit_insn (reduc (dest, tmpsum, tmpsum));
6935 /* Initialize a vector with non-constant elements. FIXME: We can do better
6936 than the current implementation (building a vector on the stack and then
6937 loading it) in many cases. See rs6000.c. */
6940 neon_expand_vector_init (rtx target, rtx vals)
6942 enum machine_mode mode = GET_MODE (target);
6943 enum machine_mode inner = GET_MODE_INNER (mode);
6944 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
6947 gcc_assert (VECTOR_MODE_P (mode));
6949 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
6950 for (i = 0; i < n_elts; i++)
6951 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
6952 XVECEXP (vals, 0, i));
6954 emit_move_insn (target, mem);
6957 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
6958 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
6959 reported source locations are bogus. */
6962 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
6967 gcc_assert (GET_CODE (operand) == CONST_INT);
6969 lane = INTVAL (operand);
6971 if (lane < low || lane >= high)
6975 /* Bounds-check lanes. */
6978 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6980 bounds_check (operand, low, high, "lane out of range");
6983 /* Bounds-check constants. */
6986 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6988 bounds_check (operand, low, high, "constant out of range");
6992 neon_element_bits (enum machine_mode mode)
6995 return GET_MODE_BITSIZE (mode);
6997 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
7001 /* Predicates for `match_operand' and `match_operator'. */
7003 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
7005 cirrus_memory_offset (rtx op)
7007 /* Reject eliminable registers. */
7008 if (! (reload_in_progress || reload_completed)
7009 && ( reg_mentioned_p (frame_pointer_rtx, op)
7010 || reg_mentioned_p (arg_pointer_rtx, op)
7011 || reg_mentioned_p (virtual_incoming_args_rtx, op)
7012 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
7013 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
7014 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
7017 if (GET_CODE (op) == MEM)
7023 /* Match: (mem (reg)). */
7024 if (GET_CODE (ind) == REG)
7030 if (GET_CODE (ind) == PLUS
7031 && GET_CODE (XEXP (ind, 0)) == REG
7032 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
7033 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
7040 /* Return TRUE if OP is a valid coprocessor memory address pattern.
7041 WB is true if full writeback address modes are allowed and is false
7042 if limited writeback address modes (POST_INC and PRE_DEC) are
7046 arm_coproc_mem_operand (rtx op, bool wb)
7050 /* Reject eliminable registers. */
7051 if (! (reload_in_progress || reload_completed)
7052 && ( reg_mentioned_p (frame_pointer_rtx, op)
7053 || reg_mentioned_p (arg_pointer_rtx, op)
7054 || reg_mentioned_p (virtual_incoming_args_rtx, op)
7055 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
7056 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
7057 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
7060 /* Constants are converted into offsets from labels. */
7061 if (GET_CODE (op) != MEM)
7066 if (reload_completed
7067 && (GET_CODE (ind) == LABEL_REF
7068 || (GET_CODE (ind) == CONST
7069 && GET_CODE (XEXP (ind, 0)) == PLUS
7070 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
7071 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
7074 /* Match: (mem (reg)). */
7075 if (GET_CODE (ind) == REG)
7076 return arm_address_register_rtx_p (ind, 0);
7078 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
7079 acceptable in any case (subject to verification by
7080 arm_address_register_rtx_p). We need WB to be true to accept
7081 PRE_INC and POST_DEC. */
7082 if (GET_CODE (ind) == POST_INC
7083 || GET_CODE (ind) == PRE_DEC
7085 && (GET_CODE (ind) == PRE_INC
7086 || GET_CODE (ind) == POST_DEC)))
7087 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
7090 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
7091 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
7092 && GET_CODE (XEXP (ind, 1)) == PLUS
7093 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
7094 ind = XEXP (ind, 1);
7099 if (GET_CODE (ind) == PLUS
7100 && GET_CODE (XEXP (ind, 0)) == REG
7101 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
7102 && GET_CODE (XEXP (ind, 1)) == CONST_INT
7103 && INTVAL (XEXP (ind, 1)) > -1024
7104 && INTVAL (XEXP (ind, 1)) < 1024
7105 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
7111 /* Return TRUE if OP is a memory operand which we can load or store a vector
7112 to/from. TYPE is one of the following values:
7113 0 - Vector load/stor (vldr)
7114 1 - Core registers (ldm)
7115 2 - Element/structure loads (vld1)
7118 neon_vector_mem_operand (rtx op, int type)
7122 /* Reject eliminable registers. */
7123 if (! (reload_in_progress || reload_completed)
7124 && ( reg_mentioned_p (frame_pointer_rtx, op)
7125 || reg_mentioned_p (arg_pointer_rtx, op)
7126 || reg_mentioned_p (virtual_incoming_args_rtx, op)
7127 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
7128 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
7129 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
7132 /* Constants are converted into offsets from labels. */
7133 if (GET_CODE (op) != MEM)
7138 if (reload_completed
7139 && (GET_CODE (ind) == LABEL_REF
7140 || (GET_CODE (ind) == CONST
7141 && GET_CODE (XEXP (ind, 0)) == PLUS
7142 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
7143 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
7146 /* Match: (mem (reg)). */
7147 if (GET_CODE (ind) == REG)
7148 return arm_address_register_rtx_p (ind, 0);
7150 /* Allow post-increment with Neon registers. */
7151 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
7152 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
7154 /* FIXME: vld1 allows register post-modify. */
7160 && GET_CODE (ind) == PLUS
7161 && GET_CODE (XEXP (ind, 0)) == REG
7162 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
7163 && GET_CODE (XEXP (ind, 1)) == CONST_INT
7164 && INTVAL (XEXP (ind, 1)) > -1024
7165 && INTVAL (XEXP (ind, 1)) < 1016
7166 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
7172 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
7175 neon_struct_mem_operand (rtx op)
7179 /* Reject eliminable registers. */
7180 if (! (reload_in_progress || reload_completed)
7181 && ( reg_mentioned_p (frame_pointer_rtx, op)
7182 || reg_mentioned_p (arg_pointer_rtx, op)
7183 || reg_mentioned_p (virtual_incoming_args_rtx, op)
7184 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
7185 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
7186 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
7189 /* Constants are converted into offsets from labels. */
7190 if (GET_CODE (op) != MEM)
7195 if (reload_completed
7196 && (GET_CODE (ind) == LABEL_REF
7197 || (GET_CODE (ind) == CONST
7198 && GET_CODE (XEXP (ind, 0)) == PLUS
7199 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
7200 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
7203 /* Match: (mem (reg)). */
7204 if (GET_CODE (ind) == REG)
7205 return arm_address_register_rtx_p (ind, 0);
7210 /* Return true if X is a register that will be eliminated later on. */
7212 arm_eliminable_register (rtx x)
7214 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
7215 || REGNO (x) == ARG_POINTER_REGNUM
7216 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
7217 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
7220 /* Return GENERAL_REGS if a scratch register required to reload x to/from
7221 coprocessor registers. Otherwise return NO_REGS. */
7224 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
7228 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
7230 return GENERAL_REGS;
7234 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7235 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7236 && neon_vector_mem_operand (x, 0))
7239 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
7242 return GENERAL_REGS;
7245 /* Values which must be returned in the most-significant end of the return
7249 arm_return_in_msb (const_tree valtype)
7251 return (TARGET_AAPCS_BASED
7253 && (AGGREGATE_TYPE_P (valtype)
7254 || TREE_CODE (valtype) == COMPLEX_TYPE));
7257 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
7258 Use by the Cirrus Maverick code which has to workaround
7259 a hardware bug triggered by such instructions. */
7261 arm_memory_load_p (rtx insn)
7263 rtx body, lhs, rhs;;
7265 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
7268 body = PATTERN (insn);
7270 if (GET_CODE (body) != SET)
7273 lhs = XEXP (body, 0);
7274 rhs = XEXP (body, 1);
7276 lhs = REG_OR_SUBREG_RTX (lhs);
7278 /* If the destination is not a general purpose
7279 register we do not have to worry. */
7280 if (GET_CODE (lhs) != REG
7281 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
7284 /* As well as loads from memory we also have to react
7285 to loads of invalid constants which will be turned
7286 into loads from the minipool. */
7287 return (GET_CODE (rhs) == MEM
7288 || GET_CODE (rhs) == SYMBOL_REF
7289 || note_invalid_constants (insn, -1, false));
7292 /* Return TRUE if INSN is a Cirrus instruction. */
7294 arm_cirrus_insn_p (rtx insn)
7296 enum attr_cirrus attr;
7298 /* get_attr cannot accept USE or CLOBBER. */
7300 || GET_CODE (insn) != INSN
7301 || GET_CODE (PATTERN (insn)) == USE
7302 || GET_CODE (PATTERN (insn)) == CLOBBER)
7305 attr = get_attr_cirrus (insn);
7307 return attr != CIRRUS_NOT;
7310 /* Cirrus reorg for invalid instruction combinations. */
7312 cirrus_reorg (rtx first)
7314 enum attr_cirrus attr;
7315 rtx body = PATTERN (first);
7319 /* Any branch must be followed by 2 non Cirrus instructions. */
7320 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
7323 t = next_nonnote_insn (first);
7325 if (arm_cirrus_insn_p (t))
7328 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
7332 emit_insn_after (gen_nop (), first);
7337 /* (float (blah)) is in parallel with a clobber. */
7338 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
7339 body = XVECEXP (body, 0, 0);
7341 if (GET_CODE (body) == SET)
7343 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
7345 /* cfldrd, cfldr64, cfstrd, cfstr64 must
7346 be followed by a non Cirrus insn. */
7347 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
7349 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
7350 emit_insn_after (gen_nop (), first);
7354 else if (arm_memory_load_p (first))
7356 unsigned int arm_regno;
7358 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
7359 ldr/cfmv64hr combination where the Rd field is the same
7360 in both instructions must be split with a non Cirrus
7367 /* Get Arm register number for ldr insn. */
7368 if (GET_CODE (lhs) == REG)
7369 arm_regno = REGNO (lhs);
7372 gcc_assert (GET_CODE (rhs) == REG);
7373 arm_regno = REGNO (rhs);
7377 first = next_nonnote_insn (first);
7379 if (! arm_cirrus_insn_p (first))
7382 body = PATTERN (first);
7384 /* (float (blah)) is in parallel with a clobber. */
7385 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
7386 body = XVECEXP (body, 0, 0);
7388 if (GET_CODE (body) == FLOAT)
7389 body = XEXP (body, 0);
7391 if (get_attr_cirrus (first) == CIRRUS_MOVE
7392 && GET_CODE (XEXP (body, 1)) == REG
7393 && arm_regno == REGNO (XEXP (body, 1)))
7394 emit_insn_after (gen_nop (), first);
7400 /* get_attr cannot accept USE or CLOBBER. */
7402 || GET_CODE (first) != INSN
7403 || GET_CODE (PATTERN (first)) == USE
7404 || GET_CODE (PATTERN (first)) == CLOBBER)
7407 attr = get_attr_cirrus (first);
7409 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
7410 must be followed by a non-coprocessor instruction. */
7411 if (attr == CIRRUS_COMPARE)
7415 t = next_nonnote_insn (first);
7417 if (arm_cirrus_insn_p (t))
7420 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
7424 emit_insn_after (gen_nop (), first);
7430 /* Return TRUE if X references a SYMBOL_REF. */
7432 symbol_mentioned_p (rtx x)
7437 if (GET_CODE (x) == SYMBOL_REF)
7440 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
7441 are constant offsets, not symbols. */
7442 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7445 fmt = GET_RTX_FORMAT (GET_CODE (x));
7447 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7453 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7454 if (symbol_mentioned_p (XVECEXP (x, i, j)))
7457 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
7464 /* Return TRUE if X references a LABEL_REF. */
7466 label_mentioned_p (rtx x)
7471 if (GET_CODE (x) == LABEL_REF)
7474 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
7475 instruction, but they are constant offsets, not symbols. */
7476 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7479 fmt = GET_RTX_FORMAT (GET_CODE (x));
7480 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7486 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7487 if (label_mentioned_p (XVECEXP (x, i, j)))
7490 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
7498 tls_mentioned_p (rtx x)
7500 switch (GET_CODE (x))
7503 return tls_mentioned_p (XEXP (x, 0));
7506 if (XINT (x, 1) == UNSPEC_TLS)
7514 /* Must not copy a SET whose source operand is PC-relative. */
7517 arm_cannot_copy_insn_p (rtx insn)
7519 rtx pat = PATTERN (insn);
7521 if (GET_CODE (pat) == SET)
7523 rtx rhs = SET_SRC (pat);
7525 if (GET_CODE (rhs) == UNSPEC
7526 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
7529 if (GET_CODE (rhs) == MEM
7530 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
7531 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
7541 enum rtx_code code = GET_CODE (x);
7558 /* Return 1 if memory locations are adjacent. */
7560 adjacent_mem_locations (rtx a, rtx b)
7562 /* We don't guarantee to preserve the order of these memory refs. */
7563 if (volatile_refs_p (a) || volatile_refs_p (b))
7566 if ((GET_CODE (XEXP (a, 0)) == REG
7567 || (GET_CODE (XEXP (a, 0)) == PLUS
7568 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
7569 && (GET_CODE (XEXP (b, 0)) == REG
7570 || (GET_CODE (XEXP (b, 0)) == PLUS
7571 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
7573 HOST_WIDE_INT val0 = 0, val1 = 0;
7577 if (GET_CODE (XEXP (a, 0)) == PLUS)
7579 reg0 = XEXP (XEXP (a, 0), 0);
7580 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
7585 if (GET_CODE (XEXP (b, 0)) == PLUS)
7587 reg1 = XEXP (XEXP (b, 0), 0);
7588 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
7593 /* Don't accept any offset that will require multiple
7594 instructions to handle, since this would cause the
7595 arith_adjacentmem pattern to output an overlong sequence. */
7596 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
7599 /* Don't allow an eliminable register: register elimination can make
7600 the offset too large. */
7601 if (arm_eliminable_register (reg0))
7604 val_diff = val1 - val0;
7608 /* If the target has load delay slots, then there's no benefit
7609 to using an ldm instruction unless the offset is zero and
7610 we are optimizing for size. */
7611 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
7612 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
7613 && (val_diff == 4 || val_diff == -4));
7616 return ((REGNO (reg0) == REGNO (reg1))
7617 && (val_diff == 4 || val_diff == -4));
7624 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7625 HOST_WIDE_INT *load_offset)
7627 int unsorted_regs[4];
7628 HOST_WIDE_INT unsorted_offsets[4];
7633 /* Can only handle 2, 3, or 4 insns at present,
7634 though could be easily extended if required. */
7635 gcc_assert (nops >= 2 && nops <= 4);
7637 memset (order, 0, 4 * sizeof (int));
7639 /* Loop over the operands and check that the memory references are
7640 suitable (i.e. immediate offsets from the same base register). At
7641 the same time, extract the target register, and the memory
7643 for (i = 0; i < nops; i++)
7648 /* Convert a subreg of a mem into the mem itself. */
7649 if (GET_CODE (operands[nops + i]) == SUBREG)
7650 operands[nops + i] = alter_subreg (operands + (nops + i));
7652 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7654 /* Don't reorder volatile memory references; it doesn't seem worth
7655 looking for the case where the order is ok anyway. */
7656 if (MEM_VOLATILE_P (operands[nops + i]))
7659 offset = const0_rtx;
7661 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7662 || (GET_CODE (reg) == SUBREG
7663 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7664 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7665 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7667 || (GET_CODE (reg) == SUBREG
7668 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7669 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7674 base_reg = REGNO (reg);
7675 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7676 ? REGNO (operands[i])
7677 : REGNO (SUBREG_REG (operands[i])));
7682 if (base_reg != (int) REGNO (reg))
7683 /* Not addressed from the same base register. */
7686 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7687 ? REGNO (operands[i])
7688 : REGNO (SUBREG_REG (operands[i])));
7689 if (unsorted_regs[i] < unsorted_regs[order[0]])
7693 /* If it isn't an integer register, or if it overwrites the
7694 base register but isn't the last insn in the list, then
7695 we can't do this. */
7696 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
7697 || (i != nops - 1 && unsorted_regs[i] == base_reg))
7700 unsorted_offsets[i] = INTVAL (offset);
7703 /* Not a suitable memory address. */
7707 /* All the useful information has now been extracted from the
7708 operands into unsorted_regs and unsorted_offsets; additionally,
7709 order[0] has been set to the lowest numbered register in the
7710 list. Sort the registers into order, and check that the memory
7711 offsets are ascending and adjacent. */
7713 for (i = 1; i < nops; i++)
7717 order[i] = order[i - 1];
7718 for (j = 0; j < nops; j++)
7719 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7720 && (order[i] == order[i - 1]
7721 || unsorted_regs[j] < unsorted_regs[order[i]]))
7724 /* Have we found a suitable register? if not, one must be used more
7726 if (order[i] == order[i - 1])
7729 /* Is the memory address adjacent and ascending? */
7730 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7738 for (i = 0; i < nops; i++)
7739 regs[i] = unsorted_regs[order[i]];
7741 *load_offset = unsorted_offsets[order[0]];
7744 if (unsorted_offsets[order[0]] == 0)
7745 return 1; /* ldmia */
7747 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
7748 return 2; /* ldmib */
7750 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
7751 return 3; /* ldmda */
7753 if (unsorted_offsets[order[nops - 1]] == -4)
7754 return 4; /* ldmdb */
7756 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
7757 if the offset isn't small enough. The reason 2 ldrs are faster
7758 is because these ARMs are able to do more than one cache access
7759 in a single cycle. The ARM9 and StrongARM have Harvard caches,
7760 whilst the ARM8 has a double bandwidth cache. This means that
7761 these cores can do both an instruction fetch and a data fetch in
7762 a single cycle, so the trick of calculating the address into a
7763 scratch register (one of the result regs) and then doing a load
7764 multiple actually becomes slower (and no smaller in code size).
7765 That is the transformation
7767 ldr rd1, [rbase + offset]
7768 ldr rd2, [rbase + offset + 4]
7772 add rd1, rbase, offset
7773 ldmia rd1, {rd1, rd2}
7775 produces worse code -- '3 cycles + any stalls on rd2' instead of
7776 '2 cycles + any stalls on rd2'. On ARMs with only one cache
7777 access per cycle, the first sequence could never complete in less
7778 than 6 cycles, whereas the ldm sequence would only take 5 and
7779 would make better use of sequential accesses if not hitting the
7782 We cheat here and test 'arm_ld_sched' which we currently know to
7783 only be true for the ARM8, ARM9 and StrongARM. If this ever
7784 changes, then the test below needs to be reworked. */
7785 if (nops == 2 && arm_ld_sched)
7788 /* Can't do it without setting up the offset, only do this if it takes
7789 no more than one insn. */
7790 return (const_ok_for_arm (unsorted_offsets[order[0]])
7791 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
7795 emit_ldm_seq (rtx *operands, int nops)
7799 HOST_WIDE_INT offset;
7803 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7806 strcpy (buf, "ldm%(ia%)\t");
7810 strcpy (buf, "ldm%(ib%)\t");
7814 strcpy (buf, "ldm%(da%)\t");
7818 strcpy (buf, "ldm%(db%)\t");
7823 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7824 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7827 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7828 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7830 output_asm_insn (buf, operands);
7832 strcpy (buf, "ldm%(ia%)\t");
7839 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7840 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7842 for (i = 1; i < nops; i++)
7843 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7844 reg_names[regs[i]]);
7846 strcat (buf, "}\t%@ phole ldm");
7848 output_asm_insn (buf, operands);
7853 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7854 HOST_WIDE_INT * load_offset)
7856 int unsorted_regs[4];
7857 HOST_WIDE_INT unsorted_offsets[4];
7862 /* Can only handle 2, 3, or 4 insns at present, though could be easily
7863 extended if required. */
7864 gcc_assert (nops >= 2 && nops <= 4);
7866 memset (order, 0, 4 * sizeof (int));
7868 /* Loop over the operands and check that the memory references are
7869 suitable (i.e. immediate offsets from the same base register). At
7870 the same time, extract the target register, and the memory
7872 for (i = 0; i < nops; i++)
7877 /* Convert a subreg of a mem into the mem itself. */
7878 if (GET_CODE (operands[nops + i]) == SUBREG)
7879 operands[nops + i] = alter_subreg (operands + (nops + i));
7881 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7883 /* Don't reorder volatile memory references; it doesn't seem worth
7884 looking for the case where the order is ok anyway. */
7885 if (MEM_VOLATILE_P (operands[nops + i]))
7888 offset = const0_rtx;
7890 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7891 || (GET_CODE (reg) == SUBREG
7892 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7893 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7894 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7896 || (GET_CODE (reg) == SUBREG
7897 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7898 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7903 base_reg = REGNO (reg);
7904 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7905 ? REGNO (operands[i])
7906 : REGNO (SUBREG_REG (operands[i])));
7911 if (base_reg != (int) REGNO (reg))
7912 /* Not addressed from the same base register. */
7915 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7916 ? REGNO (operands[i])
7917 : REGNO (SUBREG_REG (operands[i])));
7918 if (unsorted_regs[i] < unsorted_regs[order[0]])
7922 /* If it isn't an integer register, then we can't do this. */
7923 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
7926 unsorted_offsets[i] = INTVAL (offset);
7929 /* Not a suitable memory address. */
7933 /* All the useful information has now been extracted from the
7934 operands into unsorted_regs and unsorted_offsets; additionally,
7935 order[0] has been set to the lowest numbered register in the
7936 list. Sort the registers into order, and check that the memory
7937 offsets are ascending and adjacent. */
7939 for (i = 1; i < nops; i++)
7943 order[i] = order[i - 1];
7944 for (j = 0; j < nops; j++)
7945 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7946 && (order[i] == order[i - 1]
7947 || unsorted_regs[j] < unsorted_regs[order[i]]))
7950 /* Have we found a suitable register? if not, one must be used more
7952 if (order[i] == order[i - 1])
7955 /* Is the memory address adjacent and ascending? */
7956 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7964 for (i = 0; i < nops; i++)
7965 regs[i] = unsorted_regs[order[i]];
7967 *load_offset = unsorted_offsets[order[0]];
7970 if (unsorted_offsets[order[0]] == 0)
7971 return 1; /* stmia */
7973 if (unsorted_offsets[order[0]] == 4)
7974 return 2; /* stmib */
7976 if (unsorted_offsets[order[nops - 1]] == 0)
7977 return 3; /* stmda */
7979 if (unsorted_offsets[order[nops - 1]] == -4)
7980 return 4; /* stmdb */
7986 emit_stm_seq (rtx *operands, int nops)
7990 HOST_WIDE_INT offset;
7994 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7997 strcpy (buf, "stm%(ia%)\t");
8001 strcpy (buf, "stm%(ib%)\t");
8005 strcpy (buf, "stm%(da%)\t");
8009 strcpy (buf, "stm%(db%)\t");
8016 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
8017 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
8019 for (i = 1; i < nops; i++)
8020 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
8021 reg_names[regs[i]]);
8023 strcat (buf, "}\t%@ phole stm");
8025 output_asm_insn (buf, operands);
8029 /* Routines for use in generating RTL. */
8032 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
8033 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
8035 HOST_WIDE_INT offset = *offsetp;
8038 int sign = up ? 1 : -1;
8041 /* XScale has load-store double instructions, but they have stricter
8042 alignment requirements than load-store multiple, so we cannot
8045 For XScale ldm requires 2 + NREGS cycles to complete and blocks
8046 the pipeline until completion.
8054 An ldr instruction takes 1-3 cycles, but does not block the
8063 Best case ldr will always win. However, the more ldr instructions
8064 we issue, the less likely we are to be able to schedule them well.
8065 Using ldr instructions also increases code size.
8067 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
8068 for counts of 3 or 4 regs. */
8069 if (arm_tune_xscale && count <= 2 && ! optimize_size)
8075 for (i = 0; i < count; i++)
8077 addr = plus_constant (from, i * 4 * sign);
8078 mem = adjust_automodify_address (basemem, SImode, addr, offset);
8079 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
8085 emit_move_insn (from, plus_constant (from, count * 4 * sign));
8095 result = gen_rtx_PARALLEL (VOIDmode,
8096 rtvec_alloc (count + (write_back ? 1 : 0)));
8099 XVECEXP (result, 0, 0)
8100 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
8105 for (j = 0; i < count; i++, j++)
8107 addr = plus_constant (from, j * 4 * sign);
8108 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
8109 XVECEXP (result, 0, i)
8110 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
8121 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
8122 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
8124 HOST_WIDE_INT offset = *offsetp;
8127 int sign = up ? 1 : -1;
8130 /* See arm_gen_load_multiple for discussion of
8131 the pros/cons of ldm/stm usage for XScale. */
8132 if (arm_tune_xscale && count <= 2 && ! optimize_size)
8138 for (i = 0; i < count; i++)
8140 addr = plus_constant (to, i * 4 * sign);
8141 mem = adjust_automodify_address (basemem, SImode, addr, offset);
8142 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
8148 emit_move_insn (to, plus_constant (to, count * 4 * sign));
8158 result = gen_rtx_PARALLEL (VOIDmode,
8159 rtvec_alloc (count + (write_back ? 1 : 0)));
8162 XVECEXP (result, 0, 0)
8163 = gen_rtx_SET (VOIDmode, to,
8164 plus_constant (to, count * 4 * sign));
8169 for (j = 0; i < count; i++, j++)
8171 addr = plus_constant (to, j * 4 * sign);
8172 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
8173 XVECEXP (result, 0, i)
8174 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
8185 arm_gen_movmemqi (rtx *operands)
8187 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
8188 HOST_WIDE_INT srcoffset, dstoffset;
8190 rtx src, dst, srcbase, dstbase;
8191 rtx part_bytes_reg = NULL;
8194 if (GET_CODE (operands[2]) != CONST_INT
8195 || GET_CODE (operands[3]) != CONST_INT
8196 || INTVAL (operands[2]) > 64
8197 || INTVAL (operands[3]) & 3)
8200 dstbase = operands[0];
8201 srcbase = operands[1];
8203 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
8204 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
8206 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
8207 out_words_to_go = INTVAL (operands[2]) / 4;
8208 last_bytes = INTVAL (operands[2]) & 3;
8209 dstoffset = srcoffset = 0;
8211 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
8212 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
8214 for (i = 0; in_words_to_go >= 2; i+=4)
8216 if (in_words_to_go > 4)
8217 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
8218 srcbase, &srcoffset));
8220 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
8221 FALSE, srcbase, &srcoffset));
8223 if (out_words_to_go)
8225 if (out_words_to_go > 4)
8226 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
8227 dstbase, &dstoffset));
8228 else if (out_words_to_go != 1)
8229 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
8233 dstbase, &dstoffset));
8236 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
8237 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
8238 if (last_bytes != 0)
8240 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
8246 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
8247 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
8250 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
8251 if (out_words_to_go)
8255 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
8256 sreg = copy_to_reg (mem);
8258 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
8259 emit_move_insn (mem, sreg);
8262 gcc_assert (!in_words_to_go); /* Sanity check */
8267 gcc_assert (in_words_to_go > 0);
8269 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
8270 part_bytes_reg = copy_to_mode_reg (SImode, mem);
8273 gcc_assert (!last_bytes || part_bytes_reg);
8275 if (BYTES_BIG_ENDIAN && last_bytes)
8277 rtx tmp = gen_reg_rtx (SImode);
8279 /* The bytes we want are in the top end of the word. */
8280 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
8281 GEN_INT (8 * (4 - last_bytes))));
8282 part_bytes_reg = tmp;
8286 mem = adjust_automodify_address (dstbase, QImode,
8287 plus_constant (dst, last_bytes - 1),
8288 dstoffset + last_bytes - 1);
8289 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
8293 tmp = gen_reg_rtx (SImode);
8294 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
8295 part_bytes_reg = tmp;
8304 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
8305 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
8309 rtx tmp = gen_reg_rtx (SImode);
8310 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
8311 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
8312 part_bytes_reg = tmp;
8319 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
8320 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
8327 /* Select a dominance comparison mode if possible for a test of the general
8328 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
8329 COND_OR == DOM_CC_X_AND_Y => (X && Y)
8330 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
8331 COND_OR == DOM_CC_X_OR_Y => (X || Y)
8332 In all cases OP will be either EQ or NE, but we don't need to know which
8333 here. If we are unable to support a dominance comparison we return
8334 CC mode. This will then fail to match for the RTL expressions that
8335 generate this call. */
8337 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
8339 enum rtx_code cond1, cond2;
8342 /* Currently we will probably get the wrong result if the individual
8343 comparisons are not simple. This also ensures that it is safe to
8344 reverse a comparison if necessary. */
8345 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
8347 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
8351 /* The if_then_else variant of this tests the second condition if the
8352 first passes, but is true if the first fails. Reverse the first
8353 condition to get a true "inclusive-or" expression. */
8354 if (cond_or == DOM_CC_NX_OR_Y)
8355 cond1 = reverse_condition (cond1);
8357 /* If the comparisons are not equal, and one doesn't dominate the other,
8358 then we can't do this. */
8360 && !comparison_dominates_p (cond1, cond2)
8361 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
8366 enum rtx_code temp = cond1;
8374 if (cond_or == DOM_CC_X_AND_Y)
8379 case EQ: return CC_DEQmode;
8380 case LE: return CC_DLEmode;
8381 case LEU: return CC_DLEUmode;
8382 case GE: return CC_DGEmode;
8383 case GEU: return CC_DGEUmode;
8384 default: gcc_unreachable ();
8388 if (cond_or == DOM_CC_X_AND_Y)
8404 if (cond_or == DOM_CC_X_AND_Y)
8420 if (cond_or == DOM_CC_X_AND_Y)
8436 if (cond_or == DOM_CC_X_AND_Y)
8451 /* The remaining cases only occur when both comparisons are the
8454 gcc_assert (cond1 == cond2);
8458 gcc_assert (cond1 == cond2);
8462 gcc_assert (cond1 == cond2);
8466 gcc_assert (cond1 == cond2);
8470 gcc_assert (cond1 == cond2);
8479 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
8481 /* All floating point compares return CCFP if it is an equality
8482 comparison, and CCFPE otherwise. */
8483 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
8503 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
8512 /* A compare with a shifted operand. Because of canonicalization, the
8513 comparison will have to be swapped when we emit the assembler. */
8514 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
8515 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
8516 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
8517 || GET_CODE (x) == ROTATERT))
8520 /* This operation is performed swapped, but since we only rely on the Z
8521 flag we don't need an additional mode. */
8522 if (GET_MODE (y) == SImode && REG_P (y)
8523 && GET_CODE (x) == NEG
8524 && (op == EQ || op == NE))
8527 /* This is a special case that is used by combine to allow a
8528 comparison of a shifted byte load to be split into a zero-extend
8529 followed by a comparison of the shifted integer (only valid for
8530 equalities and unsigned inequalities). */
8531 if (GET_MODE (x) == SImode
8532 && GET_CODE (x) == ASHIFT
8533 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
8534 && GET_CODE (XEXP (x, 0)) == SUBREG
8535 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
8536 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
8537 && (op == EQ || op == NE
8538 || op == GEU || op == GTU || op == LTU || op == LEU)
8539 && GET_CODE (y) == CONST_INT)
8542 /* A construct for a conditional compare, if the false arm contains
8543 0, then both conditions must be true, otherwise either condition
8544 must be true. Not all conditions are possible, so CCmode is
8545 returned if it can't be done. */
8546 if (GET_CODE (x) == IF_THEN_ELSE
8547 && (XEXP (x, 2) == const0_rtx
8548 || XEXP (x, 2) == const1_rtx)
8549 && COMPARISON_P (XEXP (x, 0))
8550 && COMPARISON_P (XEXP (x, 1)))
8551 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
8552 INTVAL (XEXP (x, 2)));
8554 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
8555 if (GET_CODE (x) == AND
8556 && COMPARISON_P (XEXP (x, 0))
8557 && COMPARISON_P (XEXP (x, 1)))
8558 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
8561 if (GET_CODE (x) == IOR
8562 && COMPARISON_P (XEXP (x, 0))
8563 && COMPARISON_P (XEXP (x, 1)))
8564 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
8567 /* An operation (on Thumb) where we want to test for a single bit.
8568 This is done by shifting that bit up into the top bit of a
8569 scratch register; we can then branch on the sign bit. */
8571 && GET_MODE (x) == SImode
8572 && (op == EQ || op == NE)
8573 && GET_CODE (x) == ZERO_EXTRACT
8574 && XEXP (x, 1) == const1_rtx)
8577 /* An operation that sets the condition codes as a side-effect, the
8578 V flag is not set correctly, so we can only use comparisons where
8579 this doesn't matter. (For LT and GE we can use "mi" and "pl"
8581 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
8582 if (GET_MODE (x) == SImode
8584 && (op == EQ || op == NE || op == LT || op == GE)
8585 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
8586 || GET_CODE (x) == AND || GET_CODE (x) == IOR
8587 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
8588 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
8589 || GET_CODE (x) == LSHIFTRT
8590 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
8591 || GET_CODE (x) == ROTATERT
8592 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
8595 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
8598 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
8599 && GET_CODE (x) == PLUS
8600 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
8606 /* X and Y are two things to compare using CODE. Emit the compare insn and
8607 return the rtx for register 0 in the proper mode. FP means this is a
8608 floating point compare: I don't think that it is needed on the arm. */
8610 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
8612 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
8613 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
8615 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
8620 /* Generate a sequence of insns that will generate the correct return
8621 address mask depending on the physical architecture that the program
8624 arm_gen_return_addr_mask (void)
8626 rtx reg = gen_reg_rtx (Pmode);
8628 emit_insn (gen_return_addr_mask (reg));
8633 arm_reload_in_hi (rtx *operands)
8635 rtx ref = operands[1];
8637 HOST_WIDE_INT offset = 0;
8639 if (GET_CODE (ref) == SUBREG)
8641 offset = SUBREG_BYTE (ref);
8642 ref = SUBREG_REG (ref);
8645 if (GET_CODE (ref) == REG)
8647 /* We have a pseudo which has been spilt onto the stack; there
8648 are two cases here: the first where there is a simple
8649 stack-slot replacement and a second where the stack-slot is
8650 out of range, or is used as a subreg. */
8651 if (reg_equiv_mem[REGNO (ref)])
8653 ref = reg_equiv_mem[REGNO (ref)];
8654 base = find_replacement (&XEXP (ref, 0));
8657 /* The slot is out of range, or was dressed up in a SUBREG. */
8658 base = reg_equiv_address[REGNO (ref)];
8661 base = find_replacement (&XEXP (ref, 0));
8663 /* Handle the case where the address is too complex to be offset by 1. */
8664 if (GET_CODE (base) == MINUS
8665 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8667 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8669 emit_set_insn (base_plus, base);
8672 else if (GET_CODE (base) == PLUS)
8674 /* The addend must be CONST_INT, or we would have dealt with it above. */
8675 HOST_WIDE_INT hi, lo;
8677 offset += INTVAL (XEXP (base, 1));
8678 base = XEXP (base, 0);
8680 /* Rework the address into a legal sequence of insns. */
8681 /* Valid range for lo is -4095 -> 4095 */
8684 : -((-offset) & 0xfff));
8686 /* Corner case, if lo is the max offset then we would be out of range
8687 once we have added the additional 1 below, so bump the msb into the
8688 pre-loading insn(s). */
8692 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8693 ^ (HOST_WIDE_INT) 0x80000000)
8694 - (HOST_WIDE_INT) 0x80000000);
8696 gcc_assert (hi + lo == offset);
8700 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8702 /* Get the base address; addsi3 knows how to handle constants
8703 that require more than one insn. */
8704 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8710 /* Operands[2] may overlap operands[0] (though it won't overlap
8711 operands[1]), that's why we asked for a DImode reg -- so we can
8712 use the bit that does not overlap. */
8713 if (REGNO (operands[2]) == REGNO (operands[0]))
8714 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8716 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8718 emit_insn (gen_zero_extendqisi2 (scratch,
8719 gen_rtx_MEM (QImode,
8720 plus_constant (base,
8722 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
8723 gen_rtx_MEM (QImode,
8724 plus_constant (base,
8726 if (!BYTES_BIG_ENDIAN)
8727 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8728 gen_rtx_IOR (SImode,
8731 gen_rtx_SUBREG (SImode, operands[0], 0),
8735 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8736 gen_rtx_IOR (SImode,
8737 gen_rtx_ASHIFT (SImode, scratch,
8739 gen_rtx_SUBREG (SImode, operands[0], 0)));
8742 /* Handle storing a half-word to memory during reload by synthesizing as two
8743 byte stores. Take care not to clobber the input values until after we
8744 have moved them somewhere safe. This code assumes that if the DImode
8745 scratch in operands[2] overlaps either the input value or output address
8746 in some way, then that value must die in this insn (we absolutely need
8747 two scratch registers for some corner cases). */
8749 arm_reload_out_hi (rtx *operands)
8751 rtx ref = operands[0];
8752 rtx outval = operands[1];
8754 HOST_WIDE_INT offset = 0;
8756 if (GET_CODE (ref) == SUBREG)
8758 offset = SUBREG_BYTE (ref);
8759 ref = SUBREG_REG (ref);
8762 if (GET_CODE (ref) == REG)
8764 /* We have a pseudo which has been spilt onto the stack; there
8765 are two cases here: the first where there is a simple
8766 stack-slot replacement and a second where the stack-slot is
8767 out of range, or is used as a subreg. */
8768 if (reg_equiv_mem[REGNO (ref)])
8770 ref = reg_equiv_mem[REGNO (ref)];
8771 base = find_replacement (&XEXP (ref, 0));
8774 /* The slot is out of range, or was dressed up in a SUBREG. */
8775 base = reg_equiv_address[REGNO (ref)];
8778 base = find_replacement (&XEXP (ref, 0));
8780 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8782 /* Handle the case where the address is too complex to be offset by 1. */
8783 if (GET_CODE (base) == MINUS
8784 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8786 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8788 /* Be careful not to destroy OUTVAL. */
8789 if (reg_overlap_mentioned_p (base_plus, outval))
8791 /* Updating base_plus might destroy outval, see if we can
8792 swap the scratch and base_plus. */
8793 if (!reg_overlap_mentioned_p (scratch, outval))
8796 scratch = base_plus;
8801 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8803 /* Be conservative and copy OUTVAL into the scratch now,
8804 this should only be necessary if outval is a subreg
8805 of something larger than a word. */
8806 /* XXX Might this clobber base? I can't see how it can,
8807 since scratch is known to overlap with OUTVAL, and
8808 must be wider than a word. */
8809 emit_insn (gen_movhi (scratch_hi, outval));
8810 outval = scratch_hi;
8814 emit_set_insn (base_plus, base);
8817 else if (GET_CODE (base) == PLUS)
8819 /* The addend must be CONST_INT, or we would have dealt with it above. */
8820 HOST_WIDE_INT hi, lo;
8822 offset += INTVAL (XEXP (base, 1));
8823 base = XEXP (base, 0);
8825 /* Rework the address into a legal sequence of insns. */
8826 /* Valid range for lo is -4095 -> 4095 */
8829 : -((-offset) & 0xfff));
8831 /* Corner case, if lo is the max offset then we would be out of range
8832 once we have added the additional 1 below, so bump the msb into the
8833 pre-loading insn(s). */
8837 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8838 ^ (HOST_WIDE_INT) 0x80000000)
8839 - (HOST_WIDE_INT) 0x80000000);
8841 gcc_assert (hi + lo == offset);
8845 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8847 /* Be careful not to destroy OUTVAL. */
8848 if (reg_overlap_mentioned_p (base_plus, outval))
8850 /* Updating base_plus might destroy outval, see if we
8851 can swap the scratch and base_plus. */
8852 if (!reg_overlap_mentioned_p (scratch, outval))
8855 scratch = base_plus;
8860 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8862 /* Be conservative and copy outval into scratch now,
8863 this should only be necessary if outval is a
8864 subreg of something larger than a word. */
8865 /* XXX Might this clobber base? I can't see how it
8866 can, since scratch is known to overlap with
8868 emit_insn (gen_movhi (scratch_hi, outval));
8869 outval = scratch_hi;
8873 /* Get the base address; addsi3 knows how to handle constants
8874 that require more than one insn. */
8875 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8881 if (BYTES_BIG_ENDIAN)
8883 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8884 plus_constant (base, offset + 1)),
8885 gen_lowpart (QImode, outval)));
8886 emit_insn (gen_lshrsi3 (scratch,
8887 gen_rtx_SUBREG (SImode, outval, 0),
8889 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8890 gen_lowpart (QImode, scratch)));
8894 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8895 gen_lowpart (QImode, outval)));
8896 emit_insn (gen_lshrsi3 (scratch,
8897 gen_rtx_SUBREG (SImode, outval, 0),
8899 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8900 plus_constant (base, offset + 1)),
8901 gen_lowpart (QImode, scratch)));
8905 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
8906 (padded to the size of a word) should be passed in a register. */
8909 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
8911 if (TARGET_AAPCS_BASED)
8912 return must_pass_in_stack_var_size (mode, type);
8914 return must_pass_in_stack_var_size_or_pad (mode, type);
8918 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
8919 Return true if an argument passed on the stack should be padded upwards,
8920 i.e. if the least-significant byte has useful data.
8921 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
8922 aggregate types are placed in the lowest memory address. */
8925 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
8927 if (!TARGET_AAPCS_BASED)
8928 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
8930 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
8937 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
8938 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
8939 byte of the register has useful data, and return the opposite if the
8940 most significant byte does.
8941 For AAPCS, small aggregates and small complex types are always padded
8945 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
8946 tree type, int first ATTRIBUTE_UNUSED)
8948 if (TARGET_AAPCS_BASED
8950 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
8951 && int_size_in_bytes (type) <= 4)
8954 /* Otherwise, use default padding. */
8955 return !BYTES_BIG_ENDIAN;
8959 /* Print a symbolic form of X to the debug file, F. */
8961 arm_print_value (FILE *f, rtx x)
8963 switch (GET_CODE (x))
8966 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
8970 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
8978 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
8980 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
8981 if (i < (CONST_VECTOR_NUNITS (x) - 1))
8989 fprintf (f, "\"%s\"", XSTR (x, 0));
8993 fprintf (f, "`%s'", XSTR (x, 0));
8997 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
9001 arm_print_value (f, XEXP (x, 0));
9005 arm_print_value (f, XEXP (x, 0));
9007 arm_print_value (f, XEXP (x, 1));
9015 fprintf (f, "????");
9020 /* Routines for manipulation of the constant pool. */
9022 /* Arm instructions cannot load a large constant directly into a
9023 register; they have to come from a pc relative load. The constant
9024 must therefore be placed in the addressable range of the pc
9025 relative load. Depending on the precise pc relative load
9026 instruction the range is somewhere between 256 bytes and 4k. This
9027 means that we often have to dump a constant inside a function, and
9028 generate code to branch around it.
9030 It is important to minimize this, since the branches will slow
9031 things down and make the code larger.
9033 Normally we can hide the table after an existing unconditional
9034 branch so that there is no interruption of the flow, but in the
9035 worst case the code looks like this:
9053 We fix this by performing a scan after scheduling, which notices
9054 which instructions need to have their operands fetched from the
9055 constant table and builds the table.
9057 The algorithm starts by building a table of all the constants that
9058 need fixing up and all the natural barriers in the function (places
9059 where a constant table can be dropped without breaking the flow).
9060 For each fixup we note how far the pc-relative replacement will be
9061 able to reach and the offset of the instruction into the function.
9063 Having built the table we then group the fixes together to form
9064 tables that are as large as possible (subject to addressing
9065 constraints) and emit each table of constants after the last
9066 barrier that is within range of all the instructions in the group.
9067 If a group does not contain a barrier, then we forcibly create one
9068 by inserting a jump instruction into the flow. Once the table has
9069 been inserted, the insns are then modified to reference the
9070 relevant entry in the pool.
9072 Possible enhancements to the algorithm (not implemented) are:
9074 1) For some processors and object formats, there may be benefit in
9075 aligning the pools to the start of cache lines; this alignment
9076 would need to be taken into account when calculating addressability
9079 /* These typedefs are located at the start of this file, so that
9080 they can be used in the prototypes there. This comment is to
9081 remind readers of that fact so that the following structures
9082 can be understood more easily.
9084 typedef struct minipool_node Mnode;
9085 typedef struct minipool_fixup Mfix; */
9087 struct minipool_node
9089 /* Doubly linked chain of entries. */
9092 /* The maximum offset into the code that this entry can be placed. While
9093 pushing fixes for forward references, all entries are sorted in order
9094 of increasing max_address. */
9095 HOST_WIDE_INT max_address;
9096 /* Similarly for an entry inserted for a backwards ref. */
9097 HOST_WIDE_INT min_address;
9098 /* The number of fixes referencing this entry. This can become zero
9099 if we "unpush" an entry. In this case we ignore the entry when we
9100 come to emit the code. */
9102 /* The offset from the start of the minipool. */
9103 HOST_WIDE_INT offset;
9104 /* The value in table. */
9106 /* The mode of value. */
9107 enum machine_mode mode;
9108 /* The size of the value. With iWMMXt enabled
9109 sizes > 4 also imply an alignment of 8-bytes. */
9113 struct minipool_fixup
9117 HOST_WIDE_INT address;
9119 enum machine_mode mode;
9123 HOST_WIDE_INT forwards;
9124 HOST_WIDE_INT backwards;
9127 /* Fixes less than a word need padding out to a word boundary. */
9128 #define MINIPOOL_FIX_SIZE(mode) \
9129 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
9131 static Mnode * minipool_vector_head;
9132 static Mnode * minipool_vector_tail;
9133 static rtx minipool_vector_label;
9134 static int minipool_pad;
9136 /* The linked list of all minipool fixes required for this function. */
9137 Mfix * minipool_fix_head;
9138 Mfix * minipool_fix_tail;
9139 /* The fix entry for the current minipool, once it has been placed. */
9140 Mfix * minipool_barrier;
9142 /* Determines if INSN is the start of a jump table. Returns the end
9143 of the TABLE or NULL_RTX. */
9145 is_jump_table (rtx insn)
9149 if (GET_CODE (insn) == JUMP_INSN
9150 && JUMP_LABEL (insn) != NULL
9151 && ((table = next_real_insn (JUMP_LABEL (insn)))
9152 == next_real_insn (insn))
9154 && GET_CODE (table) == JUMP_INSN
9155 && (GET_CODE (PATTERN (table)) == ADDR_VEC
9156 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
9162 #ifndef JUMP_TABLES_IN_TEXT_SECTION
9163 #define JUMP_TABLES_IN_TEXT_SECTION 0
9166 static HOST_WIDE_INT
9167 get_jump_table_size (rtx insn)
9169 /* ADDR_VECs only take room if read-only data does into the text
9171 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
9173 rtx body = PATTERN (insn);
9174 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
9176 HOST_WIDE_INT modesize;
9178 modesize = GET_MODE_SIZE (GET_MODE (body));
9179 size = modesize * XVECLEN (body, elt);
9183 /* Round up size of TBB table to a halfword boundary. */
9184 size = (size + 1) & ~(HOST_WIDE_INT)1;
9187 /* No padding necessary for TBH. */
9190 /* Add two bytes for alignment on Thumb. */
9203 /* Move a minipool fix MP from its current location to before MAX_MP.
9204 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
9205 constraints may need updating. */
9207 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
9208 HOST_WIDE_INT max_address)
9210 /* The code below assumes these are different. */
9211 gcc_assert (mp != max_mp);
9215 if (max_address < mp->max_address)
9216 mp->max_address = max_address;
9220 if (max_address > max_mp->max_address - mp->fix_size)
9221 mp->max_address = max_mp->max_address - mp->fix_size;
9223 mp->max_address = max_address;
9225 /* Unlink MP from its current position. Since max_mp is non-null,
9226 mp->prev must be non-null. */
9227 mp->prev->next = mp->next;
9228 if (mp->next != NULL)
9229 mp->next->prev = mp->prev;
9231 minipool_vector_tail = mp->prev;
9233 /* Re-insert it before MAX_MP. */
9235 mp->prev = max_mp->prev;
9238 if (mp->prev != NULL)
9239 mp->prev->next = mp;
9241 minipool_vector_head = mp;
9244 /* Save the new entry. */
9247 /* Scan over the preceding entries and adjust their addresses as
9249 while (mp->prev != NULL
9250 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
9252 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
9259 /* Add a constant to the minipool for a forward reference. Returns the
9260 node added or NULL if the constant will not fit in this pool. */
9262 add_minipool_forward_ref (Mfix *fix)
9264 /* If set, max_mp is the first pool_entry that has a lower
9265 constraint than the one we are trying to add. */
9266 Mnode * max_mp = NULL;
9267 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
9270 /* If the minipool starts before the end of FIX->INSN then this FIX
9271 can not be placed into the current pool. Furthermore, adding the
9272 new constant pool entry may cause the pool to start FIX_SIZE bytes
9274 if (minipool_vector_head &&
9275 (fix->address + get_attr_length (fix->insn)
9276 >= minipool_vector_head->max_address - fix->fix_size))
9279 /* Scan the pool to see if a constant with the same value has
9280 already been added. While we are doing this, also note the
9281 location where we must insert the constant if it doesn't already
9283 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9285 if (GET_CODE (fix->value) == GET_CODE (mp->value)
9286 && fix->mode == mp->mode
9287 && (GET_CODE (fix->value) != CODE_LABEL
9288 || (CODE_LABEL_NUMBER (fix->value)
9289 == CODE_LABEL_NUMBER (mp->value)))
9290 && rtx_equal_p (fix->value, mp->value))
9292 /* More than one fix references this entry. */
9294 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
9297 /* Note the insertion point if necessary. */
9299 && mp->max_address > max_address)
9302 /* If we are inserting an 8-bytes aligned quantity and
9303 we have not already found an insertion point, then
9304 make sure that all such 8-byte aligned quantities are
9305 placed at the start of the pool. */
9306 if (ARM_DOUBLEWORD_ALIGN
9308 && fix->fix_size >= 8
9309 && mp->fix_size < 8)
9312 max_address = mp->max_address;
9316 /* The value is not currently in the minipool, so we need to create
9317 a new entry for it. If MAX_MP is NULL, the entry will be put on
9318 the end of the list since the placement is less constrained than
9319 any existing entry. Otherwise, we insert the new fix before
9320 MAX_MP and, if necessary, adjust the constraints on the other
9323 mp->fix_size = fix->fix_size;
9324 mp->mode = fix->mode;
9325 mp->value = fix->value;
9327 /* Not yet required for a backwards ref. */
9328 mp->min_address = -65536;
9332 mp->max_address = max_address;
9334 mp->prev = minipool_vector_tail;
9336 if (mp->prev == NULL)
9338 minipool_vector_head = mp;
9339 minipool_vector_label = gen_label_rtx ();
9342 mp->prev->next = mp;
9344 minipool_vector_tail = mp;
9348 if (max_address > max_mp->max_address - mp->fix_size)
9349 mp->max_address = max_mp->max_address - mp->fix_size;
9351 mp->max_address = max_address;
9354 mp->prev = max_mp->prev;
9356 if (mp->prev != NULL)
9357 mp->prev->next = mp;
9359 minipool_vector_head = mp;
9362 /* Save the new entry. */
9365 /* Scan over the preceding entries and adjust their addresses as
9367 while (mp->prev != NULL
9368 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
9370 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
9378 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
9379 HOST_WIDE_INT min_address)
9381 HOST_WIDE_INT offset;
9383 /* The code below assumes these are different. */
9384 gcc_assert (mp != min_mp);
9388 if (min_address > mp->min_address)
9389 mp->min_address = min_address;
9393 /* We will adjust this below if it is too loose. */
9394 mp->min_address = min_address;
9396 /* Unlink MP from its current position. Since min_mp is non-null,
9397 mp->next must be non-null. */
9398 mp->next->prev = mp->prev;
9399 if (mp->prev != NULL)
9400 mp->prev->next = mp->next;
9402 minipool_vector_head = mp->next;
9404 /* Reinsert it after MIN_MP. */
9406 mp->next = min_mp->next;
9408 if (mp->next != NULL)
9409 mp->next->prev = mp;
9411 minipool_vector_tail = mp;
9417 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9419 mp->offset = offset;
9420 if (mp->refcount > 0)
9421 offset += mp->fix_size;
9423 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
9424 mp->next->min_address = mp->min_address + mp->fix_size;
9430 /* Add a constant to the minipool for a backward reference. Returns the
9431 node added or NULL if the constant will not fit in this pool.
9433 Note that the code for insertion for a backwards reference can be
9434 somewhat confusing because the calculated offsets for each fix do
9435 not take into account the size of the pool (which is still under
9438 add_minipool_backward_ref (Mfix *fix)
9440 /* If set, min_mp is the last pool_entry that has a lower constraint
9441 than the one we are trying to add. */
9442 Mnode *min_mp = NULL;
9443 /* This can be negative, since it is only a constraint. */
9444 HOST_WIDE_INT min_address = fix->address - fix->backwards;
9447 /* If we can't reach the current pool from this insn, or if we can't
9448 insert this entry at the end of the pool without pushing other
9449 fixes out of range, then we don't try. This ensures that we
9450 can't fail later on. */
9451 if (min_address >= minipool_barrier->address
9452 || (minipool_vector_tail->min_address + fix->fix_size
9453 >= minipool_barrier->address))
9456 /* Scan the pool to see if a constant with the same value has
9457 already been added. While we are doing this, also note the
9458 location where we must insert the constant if it doesn't already
9460 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
9462 if (GET_CODE (fix->value) == GET_CODE (mp->value)
9463 && fix->mode == mp->mode
9464 && (GET_CODE (fix->value) != CODE_LABEL
9465 || (CODE_LABEL_NUMBER (fix->value)
9466 == CODE_LABEL_NUMBER (mp->value)))
9467 && rtx_equal_p (fix->value, mp->value)
9468 /* Check that there is enough slack to move this entry to the
9469 end of the table (this is conservative). */
9471 > (minipool_barrier->address
9472 + minipool_vector_tail->offset
9473 + minipool_vector_tail->fix_size)))
9476 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
9480 mp->min_address += fix->fix_size;
9483 /* Note the insertion point if necessary. */
9484 if (mp->min_address < min_address)
9486 /* For now, we do not allow the insertion of 8-byte alignment
9487 requiring nodes anywhere but at the start of the pool. */
9488 if (ARM_DOUBLEWORD_ALIGN
9489 && fix->fix_size >= 8 && mp->fix_size < 8)
9494 else if (mp->max_address
9495 < minipool_barrier->address + mp->offset + fix->fix_size)
9497 /* Inserting before this entry would push the fix beyond
9498 its maximum address (which can happen if we have
9499 re-located a forwards fix); force the new fix to come
9501 if (ARM_DOUBLEWORD_ALIGN
9502 && fix->fix_size >= 8 && mp->fix_size < 8)
9507 min_address = mp->min_address + fix->fix_size;
9510 /* Do not insert a non-8-byte aligned quantity before 8-byte
9511 aligned quantities. */
9512 else if (ARM_DOUBLEWORD_ALIGN
9513 && fix->fix_size < 8
9514 && mp->fix_size >= 8)
9517 min_address = mp->min_address + fix->fix_size;
9522 /* We need to create a new entry. */
9524 mp->fix_size = fix->fix_size;
9525 mp->mode = fix->mode;
9526 mp->value = fix->value;
9528 mp->max_address = minipool_barrier->address + 65536;
9530 mp->min_address = min_address;
9535 mp->next = minipool_vector_head;
9537 if (mp->next == NULL)
9539 minipool_vector_tail = mp;
9540 minipool_vector_label = gen_label_rtx ();
9543 mp->next->prev = mp;
9545 minipool_vector_head = mp;
9549 mp->next = min_mp->next;
9553 if (mp->next != NULL)
9554 mp->next->prev = mp;
9556 minipool_vector_tail = mp;
9559 /* Save the new entry. */
9567 /* Scan over the following entries and adjust their offsets. */
9568 while (mp->next != NULL)
9570 if (mp->next->min_address < mp->min_address + mp->fix_size)
9571 mp->next->min_address = mp->min_address + mp->fix_size;
9574 mp->next->offset = mp->offset + mp->fix_size;
9576 mp->next->offset = mp->offset;
9585 assign_minipool_offsets (Mfix *barrier)
9587 HOST_WIDE_INT offset = 0;
9590 minipool_barrier = barrier;
9592 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9594 mp->offset = offset;
9596 if (mp->refcount > 0)
9597 offset += mp->fix_size;
9601 /* Output the literal table */
9603 dump_minipool (rtx scan)
9609 if (ARM_DOUBLEWORD_ALIGN)
9610 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9611 if (mp->refcount > 0 && mp->fix_size >= 8)
9619 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
9620 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
9622 scan = emit_label_after (gen_label_rtx (), scan);
9623 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
9624 scan = emit_label_after (minipool_vector_label, scan);
9626 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
9628 if (mp->refcount > 0)
9633 ";; Offset %u, min %ld, max %ld ",
9634 (unsigned) mp->offset, (unsigned long) mp->min_address,
9635 (unsigned long) mp->max_address);
9636 arm_print_value (dump_file, mp->value);
9637 fputc ('\n', dump_file);
9640 switch (mp->fix_size)
9642 #ifdef HAVE_consttable_1
9644 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
9648 #ifdef HAVE_consttable_2
9650 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
9654 #ifdef HAVE_consttable_4
9656 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
9660 #ifdef HAVE_consttable_8
9662 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
9666 #ifdef HAVE_consttable_16
9668 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
9681 minipool_vector_head = minipool_vector_tail = NULL;
9682 scan = emit_insn_after (gen_consttable_end (), scan);
9683 scan = emit_barrier_after (scan);
9686 /* Return the cost of forcibly inserting a barrier after INSN. */
9688 arm_barrier_cost (rtx insn)
9690 /* Basing the location of the pool on the loop depth is preferable,
9691 but at the moment, the basic block information seems to be
9692 corrupt by this stage of the compilation. */
9694 rtx next = next_nonnote_insn (insn);
9696 if (next != NULL && GET_CODE (next) == CODE_LABEL)
9699 switch (GET_CODE (insn))
9702 /* It will always be better to place the table before the label, rather
9711 return base_cost - 10;
9714 return base_cost + 10;
9718 /* Find the best place in the insn stream in the range
9719 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
9720 Create the barrier by inserting a jump and add a new fix entry for
9723 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
9725 HOST_WIDE_INT count = 0;
9727 rtx from = fix->insn;
9728 /* The instruction after which we will insert the jump. */
9729 rtx selected = NULL;
9731 /* The address at which the jump instruction will be placed. */
9732 HOST_WIDE_INT selected_address;
9734 HOST_WIDE_INT max_count = max_address - fix->address;
9735 rtx label = gen_label_rtx ();
9737 selected_cost = arm_barrier_cost (from);
9738 selected_address = fix->address;
9740 while (from && count < max_count)
9745 /* This code shouldn't have been called if there was a natural barrier
9747 gcc_assert (GET_CODE (from) != BARRIER);
9749 /* Count the length of this insn. */
9750 count += get_attr_length (from);
9752 /* If there is a jump table, add its length. */
9753 tmp = is_jump_table (from);
9756 count += get_jump_table_size (tmp);
9758 /* Jump tables aren't in a basic block, so base the cost on
9759 the dispatch insn. If we select this location, we will
9760 still put the pool after the table. */
9761 new_cost = arm_barrier_cost (from);
9763 if (count < max_count
9764 && (!selected || new_cost <= selected_cost))
9767 selected_cost = new_cost;
9768 selected_address = fix->address + count;
9771 /* Continue after the dispatch table. */
9772 from = NEXT_INSN (tmp);
9776 new_cost = arm_barrier_cost (from);
9778 if (count < max_count
9779 && (!selected || new_cost <= selected_cost))
9782 selected_cost = new_cost;
9783 selected_address = fix->address + count;
9786 from = NEXT_INSN (from);
9789 /* Make sure that we found a place to insert the jump. */
9790 gcc_assert (selected);
9792 /* Create a new JUMP_INSN that branches around a barrier. */
9793 from = emit_jump_insn_after (gen_jump (label), selected);
9794 JUMP_LABEL (from) = label;
9795 barrier = emit_barrier_after (from);
9796 emit_label_after (label, barrier);
9798 /* Create a minipool barrier entry for the new barrier. */
9799 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
9800 new_fix->insn = barrier;
9801 new_fix->address = selected_address;
9802 new_fix->next = fix->next;
9803 fix->next = new_fix;
9808 /* Record that there is a natural barrier in the insn stream at
9811 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
9813 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9816 fix->address = address;
9819 if (minipool_fix_head != NULL)
9820 minipool_fix_tail->next = fix;
9822 minipool_fix_head = fix;
9824 minipool_fix_tail = fix;
9827 /* Record INSN, which will need fixing up to load a value from the
9828 minipool. ADDRESS is the offset of the insn since the start of the
9829 function; LOC is a pointer to the part of the insn which requires
9830 fixing; VALUE is the constant that must be loaded, which is of type
9833 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
9834 enum machine_mode mode, rtx value)
9836 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9839 fix->address = address;
9842 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
9844 fix->forwards = get_attr_pool_range (insn);
9845 fix->backwards = get_attr_neg_pool_range (insn);
9846 fix->minipool = NULL;
9848 /* If an insn doesn't have a range defined for it, then it isn't
9849 expecting to be reworked by this code. Better to stop now than
9850 to generate duff assembly code. */
9851 gcc_assert (fix->forwards || fix->backwards);
9853 /* If an entry requires 8-byte alignment then assume all constant pools
9854 require 4 bytes of padding. Trying to do this later on a per-pool
9855 basis is awkward because existing pool entries have to be modified. */
9856 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
9862 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
9863 GET_MODE_NAME (mode),
9864 INSN_UID (insn), (unsigned long) address,
9865 -1 * (long)fix->backwards, (long)fix->forwards);
9866 arm_print_value (dump_file, fix->value);
9867 fprintf (dump_file, "\n");
9870 /* Add it to the chain of fixes. */
9873 if (minipool_fix_head != NULL)
9874 minipool_fix_tail->next = fix;
9876 minipool_fix_head = fix;
9878 minipool_fix_tail = fix;
9881 /* Return the cost of synthesizing a 64-bit constant VAL inline.
9882 Returns the number of insns needed, or 99 if we don't know how to
9885 arm_const_double_inline_cost (rtx val)
9887 rtx lowpart, highpart;
9888 enum machine_mode mode;
9890 mode = GET_MODE (val);
9892 if (mode == VOIDmode)
9895 gcc_assert (GET_MODE_SIZE (mode) == 8);
9897 lowpart = gen_lowpart (SImode, val);
9898 highpart = gen_highpart_mode (SImode, mode, val);
9900 gcc_assert (GET_CODE (lowpart) == CONST_INT);
9901 gcc_assert (GET_CODE (highpart) == CONST_INT);
9903 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
9904 NULL_RTX, NULL_RTX, 0, 0)
9905 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
9906 NULL_RTX, NULL_RTX, 0, 0));
9909 /* Return true if it is worthwhile to split a 64-bit constant into two
9910 32-bit operations. This is the case if optimizing for size, or
9911 if we have load delay slots, or if one 32-bit part can be done with
9912 a single data operation. */
9914 arm_const_double_by_parts (rtx val)
9916 enum machine_mode mode = GET_MODE (val);
9919 if (optimize_size || arm_ld_sched)
9922 if (mode == VOIDmode)
9925 part = gen_highpart_mode (SImode, mode, val);
9927 gcc_assert (GET_CODE (part) == CONST_INT);
9929 if (const_ok_for_arm (INTVAL (part))
9930 || const_ok_for_arm (~INTVAL (part)))
9933 part = gen_lowpart (SImode, val);
9935 gcc_assert (GET_CODE (part) == CONST_INT);
9937 if (const_ok_for_arm (INTVAL (part))
9938 || const_ok_for_arm (~INTVAL (part)))
9944 /* Scan INSN and note any of its operands that need fixing.
9945 If DO_PUSHES is false we do not actually push any of the fixups
9946 needed. The function returns TRUE if any fixups were needed/pushed.
9947 This is used by arm_memory_load_p() which needs to know about loads
9948 of constants that will be converted into minipool loads. */
9950 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
9952 bool result = false;
9955 extract_insn (insn);
9957 if (!constrain_operands (1))
9958 fatal_insn_not_found (insn);
9960 if (recog_data.n_alternatives == 0)
9963 /* Fill in recog_op_alt with information about the constraints of
9965 preprocess_constraints ();
9967 for (opno = 0; opno < recog_data.n_operands; opno++)
9969 /* Things we need to fix can only occur in inputs. */
9970 if (recog_data.operand_type[opno] != OP_IN)
9973 /* If this alternative is a memory reference, then any mention
9974 of constants in this alternative is really to fool reload
9975 into allowing us to accept one there. We need to fix them up
9976 now so that we output the right code. */
9977 if (recog_op_alt[opno][which_alternative].memory_ok)
9979 rtx op = recog_data.operand[opno];
9981 if (CONSTANT_P (op))
9984 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
9985 recog_data.operand_mode[opno], op);
9988 else if (GET_CODE (op) == MEM
9989 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
9990 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
9994 rtx cop = avoid_constant_pool_reference (op);
9996 /* Casting the address of something to a mode narrower
9997 than a word can cause avoid_constant_pool_reference()
9998 to return the pool reference itself. That's no good to
9999 us here. Lets just hope that we can use the
10000 constant pool value directly. */
10002 cop = get_pool_constant (XEXP (op, 0));
10004 push_minipool_fix (insn, address,
10005 recog_data.operand_loc[opno],
10006 recog_data.operand_mode[opno], cop);
10017 /* Gcc puts the pool in the wrong place for ARM, since we can only
10018 load addresses a limited distance around the pc. We do some
10019 special munging to move the constant pool values to the correct
10020 point in the code. */
10025 HOST_WIDE_INT address = 0;
10028 minipool_fix_head = minipool_fix_tail = NULL;
10030 /* The first insn must always be a note, or the code below won't
10031 scan it properly. */
10032 insn = get_insns ();
10033 gcc_assert (GET_CODE (insn) == NOTE);
10036 /* Scan all the insns and record the operands that will need fixing. */
10037 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
10039 if (TARGET_CIRRUS_FIX_INVALID_INSNS
10040 && (arm_cirrus_insn_p (insn)
10041 || GET_CODE (insn) == JUMP_INSN
10042 || arm_memory_load_p (insn)))
10043 cirrus_reorg (insn);
10045 if (GET_CODE (insn) == BARRIER)
10046 push_minipool_barrier (insn, address);
10047 else if (INSN_P (insn))
10051 note_invalid_constants (insn, address, true);
10052 address += get_attr_length (insn);
10054 /* If the insn is a vector jump, add the size of the table
10055 and skip the table. */
10056 if ((table = is_jump_table (insn)) != NULL)
10058 address += get_jump_table_size (table);
10064 fix = minipool_fix_head;
10066 /* Now scan the fixups and perform the required changes. */
10071 Mfix * last_added_fix;
10072 Mfix * last_barrier = NULL;
10075 /* Skip any further barriers before the next fix. */
10076 while (fix && GET_CODE (fix->insn) == BARRIER)
10079 /* No more fixes. */
10083 last_added_fix = NULL;
10085 for (ftmp = fix; ftmp; ftmp = ftmp->next)
10087 if (GET_CODE (ftmp->insn) == BARRIER)
10089 if (ftmp->address >= minipool_vector_head->max_address)
10092 last_barrier = ftmp;
10094 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
10097 last_added_fix = ftmp; /* Keep track of the last fix added. */
10100 /* If we found a barrier, drop back to that; any fixes that we
10101 could have reached but come after the barrier will now go in
10102 the next mini-pool. */
10103 if (last_barrier != NULL)
10105 /* Reduce the refcount for those fixes that won't go into this
10107 for (fdel = last_barrier->next;
10108 fdel && fdel != ftmp;
10111 fdel->minipool->refcount--;
10112 fdel->minipool = NULL;
10115 ftmp = last_barrier;
10119 /* ftmp is first fix that we can't fit into this pool and
10120 there no natural barriers that we could use. Insert a
10121 new barrier in the code somewhere between the previous
10122 fix and this one, and arrange to jump around it. */
10123 HOST_WIDE_INT max_address;
10125 /* The last item on the list of fixes must be a barrier, so
10126 we can never run off the end of the list of fixes without
10127 last_barrier being set. */
10130 max_address = minipool_vector_head->max_address;
10131 /* Check that there isn't another fix that is in range that
10132 we couldn't fit into this pool because the pool was
10133 already too large: we need to put the pool before such an
10134 instruction. The pool itself may come just after the
10135 fix because create_fix_barrier also allows space for a
10136 jump instruction. */
10137 if (ftmp->address < max_address)
10138 max_address = ftmp->address + 1;
10140 last_barrier = create_fix_barrier (last_added_fix, max_address);
10143 assign_minipool_offsets (last_barrier);
10147 if (GET_CODE (ftmp->insn) != BARRIER
10148 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
10155 /* Scan over the fixes we have identified for this pool, fixing them
10156 up and adding the constants to the pool itself. */
10157 for (this_fix = fix; this_fix && ftmp != this_fix;
10158 this_fix = this_fix->next)
10159 if (GET_CODE (this_fix->insn) != BARRIER)
10162 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
10163 minipool_vector_label),
10164 this_fix->minipool->offset);
10165 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
10168 dump_minipool (last_barrier->insn);
10172 /* From now on we must synthesize any constants that we can't handle
10173 directly. This can happen if the RTL gets split during final
10174 instruction generation. */
10175 after_arm_reorg = 1;
10177 /* Free the minipool memory. */
10178 obstack_free (&minipool_obstack, minipool_startobj);
10181 /* Routines to output assembly language. */
10183 /* If the rtx is the correct value then return the string of the number.
10184 In this way we can ensure that valid double constants are generated even
10185 when cross compiling. */
10187 fp_immediate_constant (rtx x)
10192 if (!fp_consts_inited)
10195 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10196 for (i = 0; i < 8; i++)
10197 if (REAL_VALUES_EQUAL (r, values_fp[i]))
10198 return strings_fp[i];
10200 gcc_unreachable ();
10203 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
10204 static const char *
10205 fp_const_from_val (REAL_VALUE_TYPE *r)
10209 if (!fp_consts_inited)
10212 for (i = 0; i < 8; i++)
10213 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
10214 return strings_fp[i];
10216 gcc_unreachable ();
10219 /* Output the operands of a LDM/STM instruction to STREAM.
10220 MASK is the ARM register set mask of which only bits 0-15 are important.
10221 REG is the base register, either the frame pointer or the stack pointer,
10222 INSTR is the possibly suffixed load or store instruction.
10223 RFE is nonzero if the instruction should also copy spsr to cpsr. */
10226 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
10227 unsigned long mask, int rfe)
10230 bool not_first = FALSE;
10232 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
10233 fputc ('\t', stream);
10234 asm_fprintf (stream, instr, reg);
10235 fputc ('{', stream);
10237 for (i = 0; i <= LAST_ARM_REGNUM; i++)
10238 if (mask & (1 << i))
10241 fprintf (stream, ", ");
10243 asm_fprintf (stream, "%r", i);
10248 fprintf (stream, "}^\n");
10250 fprintf (stream, "}\n");
10254 /* Output a FLDMD instruction to STREAM.
10255 BASE if the register containing the address.
10256 REG and COUNT specify the register range.
10257 Extra registers may be added to avoid hardware bugs.
10259 We output FLDMD even for ARMv5 VFP implementations. Although
10260 FLDMD is technically not supported until ARMv6, it is believed
10261 that all VFP implementations support its use in this context. */
10264 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
10268 /* Workaround ARM10 VFPr1 bug. */
10269 if (count == 2 && !arm_arch6)
10276 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
10277 load into multiple parts if we have to handle more than 16 registers. */
10280 vfp_output_fldmd (stream, base, reg, 16);
10281 vfp_output_fldmd (stream, base, reg + 16, count - 16);
10285 fputc ('\t', stream);
10286 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
10288 for (i = reg; i < reg + count; i++)
10291 fputs (", ", stream);
10292 asm_fprintf (stream, "d%d", i);
10294 fputs ("}\n", stream);
10299 /* Output the assembly for a store multiple. */
10302 vfp_output_fstmd (rtx * operands)
10309 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
10310 p = strlen (pattern);
10312 gcc_assert (GET_CODE (operands[1]) == REG);
10314 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
10315 for (i = 1; i < XVECLEN (operands[2], 0); i++)
10317 p += sprintf (&pattern[p], ", d%d", base + i);
10319 strcpy (&pattern[p], "}");
10321 output_asm_insn (pattern, operands);
10326 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
10327 number of bytes pushed. */
10330 vfp_emit_fstmd (int base_reg, int count)
10337 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
10338 register pairs are stored by a store multiple insn. We avoid this
10339 by pushing an extra pair. */
10340 if (count == 2 && !arm_arch6)
10342 if (base_reg == LAST_VFP_REGNUM - 3)
10347 /* FSTMD may not store more than 16 doubleword registers at once. Split
10348 larger stores into multiple parts (up to a maximum of two, in
10353 /* NOTE: base_reg is an internal register number, so each D register
10355 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
10356 saved += vfp_emit_fstmd (base_reg, 16);
10360 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
10361 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
10363 reg = gen_rtx_REG (DFmode, base_reg);
10366 XVECEXP (par, 0, 0)
10367 = gen_rtx_SET (VOIDmode,
10368 gen_frame_mem (BLKmode,
10369 gen_rtx_PRE_DEC (BLKmode,
10370 stack_pointer_rtx)),
10371 gen_rtx_UNSPEC (BLKmode,
10372 gen_rtvec (1, reg),
10373 UNSPEC_PUSH_MULT));
10375 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10376 plus_constant (stack_pointer_rtx, -(count * 8)));
10377 RTX_FRAME_RELATED_P (tmp) = 1;
10378 XVECEXP (dwarf, 0, 0) = tmp;
10380 tmp = gen_rtx_SET (VOIDmode,
10381 gen_frame_mem (DFmode, stack_pointer_rtx),
10383 RTX_FRAME_RELATED_P (tmp) = 1;
10384 XVECEXP (dwarf, 0, 1) = tmp;
10386 for (i = 1; i < count; i++)
10388 reg = gen_rtx_REG (DFmode, base_reg);
10390 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
10392 tmp = gen_rtx_SET (VOIDmode,
10393 gen_frame_mem (DFmode,
10394 plus_constant (stack_pointer_rtx,
10397 RTX_FRAME_RELATED_P (tmp) = 1;
10398 XVECEXP (dwarf, 0, i + 1) = tmp;
10401 par = emit_insn (par);
10402 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
10403 RTX_FRAME_RELATED_P (par) = 1;
10408 /* Emit a call instruction with pattern PAT. ADDR is the address of
10409 the call target. */
10412 arm_emit_call_insn (rtx pat, rtx addr)
10416 insn = emit_call_insn (pat);
10418 /* The PIC register is live on entry to VxWorks PIC PLT entries.
10419 If the call might use such an entry, add a use of the PIC register
10420 to the instruction's CALL_INSN_FUNCTION_USAGE. */
10421 if (TARGET_VXWORKS_RTP
10423 && GET_CODE (addr) == SYMBOL_REF
10424 && (SYMBOL_REF_DECL (addr)
10425 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
10426 : !SYMBOL_REF_LOCAL_P (addr)))
10428 require_pic_register ();
10429 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
10433 /* Output a 'call' insn. */
10435 output_call (rtx *operands)
10437 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
10439 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
10440 if (REGNO (operands[0]) == LR_REGNUM)
10442 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
10443 output_asm_insn ("mov%?\t%0, %|lr", operands);
10446 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10448 if (TARGET_INTERWORK || arm_arch4t)
10449 output_asm_insn ("bx%?\t%0", operands);
10451 output_asm_insn ("mov%?\t%|pc, %0", operands);
10456 /* Output a 'call' insn that is a reference in memory. */
10458 output_call_mem (rtx *operands)
10460 if (TARGET_INTERWORK && !arm_arch5)
10462 output_asm_insn ("ldr%?\t%|ip, %0", operands);
10463 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10464 output_asm_insn ("bx%?\t%|ip", operands);
10466 else if (regno_use_in (LR_REGNUM, operands[0]))
10468 /* LR is used in the memory address. We load the address in the
10469 first instruction. It's safe to use IP as the target of the
10470 load since the call will kill it anyway. */
10471 output_asm_insn ("ldr%?\t%|ip, %0", operands);
10473 output_asm_insn ("blx%?\t%|ip", operands);
10476 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10478 output_asm_insn ("bx%?\t%|ip", operands);
10480 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
10485 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10486 output_asm_insn ("ldr%?\t%|pc, %0", operands);
10493 /* Output a move from arm registers to an fpa registers.
10494 OPERANDS[0] is an fpa register.
10495 OPERANDS[1] is the first registers of an arm register pair. */
10497 output_mov_long_double_fpa_from_arm (rtx *operands)
10499 int arm_reg0 = REGNO (operands[1]);
10502 gcc_assert (arm_reg0 != IP_REGNUM);
10504 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10505 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10506 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
10508 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
10509 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
10514 /* Output a move from an fpa register to arm registers.
10515 OPERANDS[0] is the first registers of an arm register pair.
10516 OPERANDS[1] is an fpa register. */
10518 output_mov_long_double_arm_from_fpa (rtx *operands)
10520 int arm_reg0 = REGNO (operands[0]);
10523 gcc_assert (arm_reg0 != IP_REGNUM);
10525 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10526 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10527 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
10529 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
10530 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
10534 /* Output a move from arm registers to arm registers of a long double
10535 OPERANDS[0] is the destination.
10536 OPERANDS[1] is the source. */
10538 output_mov_long_double_arm_from_arm (rtx *operands)
10540 /* We have to be careful here because the two might overlap. */
10541 int dest_start = REGNO (operands[0]);
10542 int src_start = REGNO (operands[1]);
10546 if (dest_start < src_start)
10548 for (i = 0; i < 3; i++)
10550 ops[0] = gen_rtx_REG (SImode, dest_start + i);
10551 ops[1] = gen_rtx_REG (SImode, src_start + i);
10552 output_asm_insn ("mov%?\t%0, %1", ops);
10557 for (i = 2; i >= 0; i--)
10559 ops[0] = gen_rtx_REG (SImode, dest_start + i);
10560 ops[1] = gen_rtx_REG (SImode, src_start + i);
10561 output_asm_insn ("mov%?\t%0, %1", ops);
10569 /* Emit a MOVW/MOVT pair. */
10570 void arm_emit_movpair (rtx dest, rtx src)
10572 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
10573 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
10577 /* Output a move from arm registers to an fpa registers.
10578 OPERANDS[0] is an fpa register.
10579 OPERANDS[1] is the first registers of an arm register pair. */
10581 output_mov_double_fpa_from_arm (rtx *operands)
10583 int arm_reg0 = REGNO (operands[1]);
10586 gcc_assert (arm_reg0 != IP_REGNUM);
10588 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10589 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10590 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
10591 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
10595 /* Output a move from an fpa register to arm registers.
10596 OPERANDS[0] is the first registers of an arm register pair.
10597 OPERANDS[1] is an fpa register. */
10599 output_mov_double_arm_from_fpa (rtx *operands)
10601 int arm_reg0 = REGNO (operands[0]);
10604 gcc_assert (arm_reg0 != IP_REGNUM);
10606 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10607 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10608 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
10609 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
10613 /* Output a move between double words.
10614 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
10615 or MEM<-REG and all MEMs must be offsettable addresses. */
10617 output_move_double (rtx *operands)
10619 enum rtx_code code0 = GET_CODE (operands[0]);
10620 enum rtx_code code1 = GET_CODE (operands[1]);
10625 unsigned int reg0 = REGNO (operands[0]);
10627 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
10629 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
10631 switch (GET_CODE (XEXP (operands[1], 0)))
10635 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
10636 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
10638 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10642 gcc_assert (TARGET_LDRD);
10643 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
10648 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
10650 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
10655 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
10657 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
10661 gcc_assert (TARGET_LDRD);
10662 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
10667 /* Autoicrement addressing modes should never have overlapping
10668 base and destination registers, and overlapping index registers
10669 are already prohibited, so this doesn't need to worry about
10671 otherops[0] = operands[0];
10672 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
10673 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
10675 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
10677 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
10679 /* Registers overlap so split out the increment. */
10680 output_asm_insn ("add%?\t%1, %1, %2", otherops);
10681 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
10685 /* Use a single insn if we can.
10686 FIXME: IWMMXT allows offsets larger than ldrd can
10687 handle, fix these up with a pair of ldr. */
10689 || GET_CODE (otherops[2]) != CONST_INT
10690 || (INTVAL (otherops[2]) > -256
10691 && INTVAL (otherops[2]) < 256))
10692 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
10695 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10696 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10702 /* Use a single insn if we can.
10703 FIXME: IWMMXT allows offsets larger than ldrd can handle,
10704 fix these up with a pair of ldr. */
10706 || GET_CODE (otherops[2]) != CONST_INT
10707 || (INTVAL (otherops[2]) > -256
10708 && INTVAL (otherops[2]) < 256))
10709 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
10712 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10713 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10720 /* We might be able to use ldrd %0, %1 here. However the range is
10721 different to ldr/adr, and it is broken on some ARMv7-M
10722 implementations. */
10723 /* Use the second register of the pair to avoid problematic
10725 otherops[1] = operands[1];
10726 output_asm_insn ("adr%?\t%0, %1", otherops);
10727 operands[1] = otherops[0];
10729 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
10731 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
10734 /* ??? This needs checking for thumb2. */
10736 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
10737 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
10739 otherops[0] = operands[0];
10740 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
10741 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
10743 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
10745 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
10747 switch ((int) INTVAL (otherops[2]))
10750 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
10755 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
10760 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
10764 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
10765 operands[1] = otherops[0];
10767 && (GET_CODE (otherops[2]) == REG
10769 || (GET_CODE (otherops[2]) == CONST_INT
10770 && INTVAL (otherops[2]) > -256
10771 && INTVAL (otherops[2]) < 256)))
10773 if (reg_overlap_mentioned_p (operands[0],
10777 /* Swap base and index registers over to
10778 avoid a conflict. */
10780 otherops[1] = otherops[2];
10783 /* If both registers conflict, it will usually
10784 have been fixed by a splitter. */
10785 if (reg_overlap_mentioned_p (operands[0], otherops[2])
10786 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
10788 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10789 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
10793 otherops[0] = operands[0];
10794 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
10799 if (GET_CODE (otherops[2]) == CONST_INT)
10801 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
10802 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
10804 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10807 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10810 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
10813 return "ldr%(d%)\t%0, [%1]";
10815 return "ldm%(ia%)\t%1, %M0";
10819 otherops[1] = adjust_address (operands[1], SImode, 4);
10820 /* Take care of overlapping base/data reg. */
10821 if (reg_mentioned_p (operands[0], operands[1]))
10823 output_asm_insn ("ldr%?\t%0, %1", otherops);
10824 output_asm_insn ("ldr%?\t%0, %1", operands);
10828 output_asm_insn ("ldr%?\t%0, %1", operands);
10829 output_asm_insn ("ldr%?\t%0, %1", otherops);
10836 /* Constraints should ensure this. */
10837 gcc_assert (code0 == MEM && code1 == REG);
10838 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
10840 switch (GET_CODE (XEXP (operands[0], 0)))
10844 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
10846 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10850 gcc_assert (TARGET_LDRD);
10851 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
10856 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
10858 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
10863 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
10865 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
10869 gcc_assert (TARGET_LDRD);
10870 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
10875 otherops[0] = operands[1];
10876 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
10877 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
10879 /* IWMMXT allows offsets larger than ldrd can handle,
10880 fix these up with a pair of ldr. */
10882 && GET_CODE (otherops[2]) == CONST_INT
10883 && (INTVAL(otherops[2]) <= -256
10884 || INTVAL(otherops[2]) >= 256))
10886 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10888 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10889 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10893 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10894 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10897 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10898 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
10900 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
10904 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
10905 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
10907 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
10910 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
10916 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
10922 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
10927 && (GET_CODE (otherops[2]) == REG
10929 || (GET_CODE (otherops[2]) == CONST_INT
10930 && INTVAL (otherops[2]) > -256
10931 && INTVAL (otherops[2]) < 256)))
10933 otherops[0] = operands[1];
10934 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
10935 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
10941 otherops[0] = adjust_address (operands[0], SImode, 4);
10942 otherops[1] = operands[1];
10943 output_asm_insn ("str%?\t%1, %0", operands);
10944 output_asm_insn ("str%?\t%H1, %0", otherops);
10951 /* Output a move, load or store for quad-word vectors in ARM registers. Only
10952 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
10955 output_move_quad (rtx *operands)
10957 if (REG_P (operands[0]))
10959 /* Load, or reg->reg move. */
10961 if (MEM_P (operands[1]))
10963 switch (GET_CODE (XEXP (operands[1], 0)))
10966 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10971 output_asm_insn ("adr%?\t%0, %1", operands);
10972 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
10976 gcc_unreachable ();
10984 gcc_assert (REG_P (operands[1]));
10986 dest = REGNO (operands[0]);
10987 src = REGNO (operands[1]);
10989 /* This seems pretty dumb, but hopefully GCC won't try to do it
10992 for (i = 0; i < 4; i++)
10994 ops[0] = gen_rtx_REG (SImode, dest + i);
10995 ops[1] = gen_rtx_REG (SImode, src + i);
10996 output_asm_insn ("mov%?\t%0, %1", ops);
10999 for (i = 3; i >= 0; i--)
11001 ops[0] = gen_rtx_REG (SImode, dest + i);
11002 ops[1] = gen_rtx_REG (SImode, src + i);
11003 output_asm_insn ("mov%?\t%0, %1", ops);
11009 gcc_assert (MEM_P (operands[0]));
11010 gcc_assert (REG_P (operands[1]));
11011 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
11013 switch (GET_CODE (XEXP (operands[0], 0)))
11016 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
11020 gcc_unreachable ();
11027 /* Output a VFP load or store instruction. */
11030 output_move_vfp (rtx *operands)
11032 rtx reg, mem, addr, ops[2];
11033 int load = REG_P (operands[0]);
11034 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
11035 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
11038 enum machine_mode mode;
11040 reg = operands[!load];
11041 mem = operands[load];
11043 mode = GET_MODE (reg);
11045 gcc_assert (REG_P (reg));
11046 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
11047 gcc_assert (mode == SFmode
11051 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
11052 gcc_assert (MEM_P (mem));
11054 addr = XEXP (mem, 0);
11056 switch (GET_CODE (addr))
11059 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
11060 ops[0] = XEXP (addr, 0);
11065 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
11066 ops[0] = XEXP (addr, 0);
11071 templ = "f%s%c%%?\t%%%s0, %%1%s";
11077 sprintf (buff, templ,
11078 load ? "ld" : "st",
11081 integer_p ? "\t%@ int" : "");
11082 output_asm_insn (buff, ops);
11087 /* Output a Neon quad-word load or store, or a load or store for
11088 larger structure modes.
11090 WARNING: The ordering of elements is weird in big-endian mode,
11091 because we use VSTM, as required by the EABI. GCC RTL defines
11092 element ordering based on in-memory order. This can be differ
11093 from the architectural ordering of elements within a NEON register.
11094 The intrinsics defined in arm_neon.h use the NEON register element
11095 ordering, not the GCC RTL element ordering.
11097 For example, the in-memory ordering of a big-endian a quadword
11098 vector with 16-bit elements when stored from register pair {d0,d1}
11099 will be (lowest address first, d0[N] is NEON register element N):
11101 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
11103 When necessary, quadword registers (dN, dN+1) are moved to ARM
11104 registers from rN in the order:
11106 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
11108 So that STM/LDM can be used on vectors in ARM registers, and the
11109 same memory layout will result as if VSTM/VLDM were used. */
11112 output_move_neon (rtx *operands)
11114 rtx reg, mem, addr, ops[2];
11115 int regno, load = REG_P (operands[0]);
11118 enum machine_mode mode;
11120 reg = operands[!load];
11121 mem = operands[load];
11123 mode = GET_MODE (reg);
11125 gcc_assert (REG_P (reg));
11126 regno = REGNO (reg);
11127 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
11128 || NEON_REGNO_OK_FOR_QUAD (regno));
11129 gcc_assert (VALID_NEON_DREG_MODE (mode)
11130 || VALID_NEON_QREG_MODE (mode)
11131 || VALID_NEON_STRUCT_MODE (mode));
11132 gcc_assert (MEM_P (mem));
11134 addr = XEXP (mem, 0);
11136 /* Strip off const from addresses like (const (plus (...))). */
11137 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
11138 addr = XEXP (addr, 0);
11140 switch (GET_CODE (addr))
11143 templ = "v%smia%%?\t%%0!, %%h1";
11144 ops[0] = XEXP (addr, 0);
11149 /* FIXME: We should be using vld1/vst1 here in BE mode? */
11150 templ = "v%smdb%%?\t%%0!, %%h1";
11151 ops[0] = XEXP (addr, 0);
11156 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
11157 gcc_unreachable ();
11162 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
11165 for (i = 0; i < nregs; i++)
11167 /* We're only using DImode here because it's a convenient size. */
11168 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
11169 ops[1] = adjust_address (mem, SImode, 8 * i);
11170 if (reg_overlap_mentioned_p (ops[0], mem))
11172 gcc_assert (overlap == -1);
11177 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
11178 output_asm_insn (buff, ops);
11183 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
11184 ops[1] = adjust_address (mem, SImode, 8 * overlap);
11185 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
11186 output_asm_insn (buff, ops);
11193 templ = "v%smia%%?\t%%m0, %%h1";
11198 sprintf (buff, templ, load ? "ld" : "st");
11199 output_asm_insn (buff, ops);
11204 /* Output an ADD r, s, #n where n may be too big for one instruction.
11205 If adding zero to one register, output nothing. */
11207 output_add_immediate (rtx *operands)
11209 HOST_WIDE_INT n = INTVAL (operands[2]);
11211 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
11214 output_multi_immediate (operands,
11215 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
11218 output_multi_immediate (operands,
11219 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
11226 /* Output a multiple immediate operation.
11227 OPERANDS is the vector of operands referred to in the output patterns.
11228 INSTR1 is the output pattern to use for the first constant.
11229 INSTR2 is the output pattern to use for subsequent constants.
11230 IMMED_OP is the index of the constant slot in OPERANDS.
11231 N is the constant value. */
11232 static const char *
11233 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
11234 int immed_op, HOST_WIDE_INT n)
11236 #if HOST_BITS_PER_WIDE_INT > 32
11242 /* Quick and easy output. */
11243 operands[immed_op] = const0_rtx;
11244 output_asm_insn (instr1, operands);
11249 const char * instr = instr1;
11251 /* Note that n is never zero here (which would give no output). */
11252 for (i = 0; i < 32; i += 2)
11256 operands[immed_op] = GEN_INT (n & (255 << i));
11257 output_asm_insn (instr, operands);
11267 /* Return the name of a shifter operation. */
11268 static const char *
11269 arm_shift_nmem(enum rtx_code code)
11274 return ARM_LSL_NAME;
11290 /* Return the appropriate ARM instruction for the operation code.
11291 The returned result should not be overwritten. OP is the rtx of the
11292 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
11295 arithmetic_instr (rtx op, int shift_first_arg)
11297 switch (GET_CODE (op))
11303 return shift_first_arg ? "rsb" : "sub";
11318 return arm_shift_nmem(GET_CODE(op));
11321 gcc_unreachable ();
11325 /* Ensure valid constant shifts and return the appropriate shift mnemonic
11326 for the operation code. The returned result should not be overwritten.
11327 OP is the rtx code of the shift.
11328 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
11330 static const char *
11331 shift_op (rtx op, HOST_WIDE_INT *amountp)
11334 enum rtx_code code = GET_CODE (op);
11336 switch (GET_CODE (XEXP (op, 1)))
11344 *amountp = INTVAL (XEXP (op, 1));
11348 gcc_unreachable ();
11354 gcc_assert (*amountp != -1);
11355 *amountp = 32 - *amountp;
11358 /* Fall through. */
11364 mnem = arm_shift_nmem(code);
11368 /* We never have to worry about the amount being other than a
11369 power of 2, since this case can never be reloaded from a reg. */
11370 gcc_assert (*amountp != -1);
11371 *amountp = int_log2 (*amountp);
11372 return ARM_LSL_NAME;
11375 gcc_unreachable ();
11378 if (*amountp != -1)
11380 /* This is not 100% correct, but follows from the desire to merge
11381 multiplication by a power of 2 with the recognizer for a
11382 shift. >=32 is not a valid shift for "lsl", so we must try and
11383 output a shift that produces the correct arithmetical result.
11384 Using lsr #32 is identical except for the fact that the carry bit
11385 is not set correctly if we set the flags; but we never use the
11386 carry bit from such an operation, so we can ignore that. */
11387 if (code == ROTATERT)
11388 /* Rotate is just modulo 32. */
11390 else if (*amountp != (*amountp & 31))
11392 if (code == ASHIFT)
11397 /* Shifts of 0 are no-ops. */
11405 /* Obtain the shift from the POWER of two. */
11407 static HOST_WIDE_INT
11408 int_log2 (HOST_WIDE_INT power)
11410 HOST_WIDE_INT shift = 0;
11412 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
11414 gcc_assert (shift <= 31);
11421 /* Output a .ascii pseudo-op, keeping track of lengths. This is
11422 because /bin/as is horribly restrictive. The judgement about
11423 whether or not each character is 'printable' (and can be output as
11424 is) or not (and must be printed with an octal escape) must be made
11425 with reference to the *host* character set -- the situation is
11426 similar to that discussed in the comments above pp_c_char in
11427 c-pretty-print.c. */
11429 #define MAX_ASCII_LEN 51
11432 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
11435 int len_so_far = 0;
11437 fputs ("\t.ascii\t\"", stream);
11439 for (i = 0; i < len; i++)
11443 if (len_so_far >= MAX_ASCII_LEN)
11445 fputs ("\"\n\t.ascii\t\"", stream);
11451 if (c == '\\' || c == '\"')
11453 putc ('\\', stream);
11461 fprintf (stream, "\\%03o", c);
11466 fputs ("\"\n", stream);
11469 /* Compute the register save mask for registers 0 through 12
11470 inclusive. This code is used by arm_compute_save_reg_mask. */
11472 static unsigned long
11473 arm_compute_save_reg0_reg12_mask (void)
11475 unsigned long func_type = arm_current_func_type ();
11476 unsigned long save_reg_mask = 0;
11479 if (IS_INTERRUPT (func_type))
11481 unsigned int max_reg;
11482 /* Interrupt functions must not corrupt any registers,
11483 even call clobbered ones. If this is a leaf function
11484 we can just examine the registers used by the RTL, but
11485 otherwise we have to assume that whatever function is
11486 called might clobber anything, and so we have to save
11487 all the call-clobbered registers as well. */
11488 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
11489 /* FIQ handlers have registers r8 - r12 banked, so
11490 we only need to check r0 - r7, Normal ISRs only
11491 bank r14 and r15, so we must check up to r12.
11492 r13 is the stack pointer which is always preserved,
11493 so we do not need to consider it here. */
11498 for (reg = 0; reg <= max_reg; reg++)
11499 if (df_regs_ever_live_p (reg)
11500 || (! current_function_is_leaf && call_used_regs[reg]))
11501 save_reg_mask |= (1 << reg);
11503 /* Also save the pic base register if necessary. */
11505 && !TARGET_SINGLE_PIC_BASE
11506 && arm_pic_register != INVALID_REGNUM
11507 && crtl->uses_pic_offset_table)
11508 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11512 /* In the normal case we only need to save those registers
11513 which are call saved and which are used by this function. */
11514 for (reg = 0; reg <= 11; reg++)
11515 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
11516 save_reg_mask |= (1 << reg);
11518 /* Handle the frame pointer as a special case. */
11519 if (frame_pointer_needed)
11520 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
11522 /* If we aren't loading the PIC register,
11523 don't stack it even though it may be live. */
11525 && !TARGET_SINGLE_PIC_BASE
11526 && arm_pic_register != INVALID_REGNUM
11527 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
11528 || crtl->uses_pic_offset_table))
11529 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11531 /* The prologue will copy SP into R0, so save it. */
11532 if (IS_STACKALIGN (func_type))
11533 save_reg_mask |= 1;
11536 /* Save registers so the exception handler can modify them. */
11537 if (crtl->calls_eh_return)
11543 reg = EH_RETURN_DATA_REGNO (i);
11544 if (reg == INVALID_REGNUM)
11546 save_reg_mask |= 1 << reg;
11550 return save_reg_mask;
11554 /* Compute the number of bytes used to store the static chain register on the
11555 stack, above the stack frame. We need to know this accurately to get the
11556 alignment of the rest of the stack frame correct. */
11558 static int arm_compute_static_chain_stack_bytes (void)
11560 unsigned long func_type = arm_current_func_type ();
11561 int static_chain_stack_bytes = 0;
11563 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
11564 IS_NESTED (func_type) &&
11565 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
11566 static_chain_stack_bytes = 4;
11568 return static_chain_stack_bytes;
11572 /* Compute a bit mask of which registers need to be
11573 saved on the stack for the current function.
11574 This is used by arm_get_frame_offsets, which may add extra registers. */
11576 static unsigned long
11577 arm_compute_save_reg_mask (void)
11579 unsigned int save_reg_mask = 0;
11580 unsigned long func_type = arm_current_func_type ();
11583 if (IS_NAKED (func_type))
11584 /* This should never really happen. */
11587 /* If we are creating a stack frame, then we must save the frame pointer,
11588 IP (which will hold the old stack pointer), LR and the PC. */
11589 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
11591 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
11594 | (1 << PC_REGNUM);
11596 /* Volatile functions do not return, so there
11597 is no need to save any other registers. */
11598 if (IS_VOLATILE (func_type))
11599 return save_reg_mask;
11601 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
11603 /* Decide if we need to save the link register.
11604 Interrupt routines have their own banked link register,
11605 so they never need to save it.
11606 Otherwise if we do not use the link register we do not need to save
11607 it. If we are pushing other registers onto the stack however, we
11608 can save an instruction in the epilogue by pushing the link register
11609 now and then popping it back into the PC. This incurs extra memory
11610 accesses though, so we only do it when optimizing for size, and only
11611 if we know that we will not need a fancy return sequence. */
11612 if (df_regs_ever_live_p (LR_REGNUM)
11615 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
11616 && !crtl->calls_eh_return))
11617 save_reg_mask |= 1 << LR_REGNUM;
11619 if (cfun->machine->lr_save_eliminated)
11620 save_reg_mask &= ~ (1 << LR_REGNUM);
11622 if (TARGET_REALLY_IWMMXT
11623 && ((bit_count (save_reg_mask)
11624 + ARM_NUM_INTS (crtl->args.pretend_args_size +
11625 arm_compute_static_chain_stack_bytes())
11628 /* The total number of registers that are going to be pushed
11629 onto the stack is odd. We need to ensure that the stack
11630 is 64-bit aligned before we start to save iWMMXt registers,
11631 and also before we start to create locals. (A local variable
11632 might be a double or long long which we will load/store using
11633 an iWMMXt instruction). Therefore we need to push another
11634 ARM register, so that the stack will be 64-bit aligned. We
11635 try to avoid using the arg registers (r0 -r3) as they might be
11636 used to pass values in a tail call. */
11637 for (reg = 4; reg <= 12; reg++)
11638 if ((save_reg_mask & (1 << reg)) == 0)
11642 save_reg_mask |= (1 << reg);
11645 cfun->machine->sibcall_blocked = 1;
11646 save_reg_mask |= (1 << 3);
11650 /* We may need to push an additional register for use initializing the
11651 PIC base register. */
11652 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
11653 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
11655 reg = thumb_find_work_register (1 << 4);
11656 if (!call_used_regs[reg])
11657 save_reg_mask |= (1 << reg);
11660 return save_reg_mask;
11664 /* Compute a bit mask of which registers need to be
11665 saved on the stack for the current function. */
11666 static unsigned long
11667 thumb1_compute_save_reg_mask (void)
11669 unsigned long mask;
11673 for (reg = 0; reg < 12; reg ++)
11674 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11678 && !TARGET_SINGLE_PIC_BASE
11679 && arm_pic_register != INVALID_REGNUM
11680 && crtl->uses_pic_offset_table)
11681 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11683 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
11684 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
11685 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
11687 /* LR will also be pushed if any lo regs are pushed. */
11688 if (mask & 0xff || thumb_force_lr_save ())
11689 mask |= (1 << LR_REGNUM);
11691 /* Make sure we have a low work register if we need one.
11692 We will need one if we are going to push a high register,
11693 but we are not currently intending to push a low register. */
11694 if ((mask & 0xff) == 0
11695 && ((mask & 0x0f00) || TARGET_BACKTRACE))
11697 /* Use thumb_find_work_register to choose which register
11698 we will use. If the register is live then we will
11699 have to push it. Use LAST_LO_REGNUM as our fallback
11700 choice for the register to select. */
11701 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
11702 /* Make sure the register returned by thumb_find_work_register is
11703 not part of the return value. */
11704 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
11705 reg = LAST_LO_REGNUM;
11707 if (! call_used_regs[reg])
11711 /* The 504 below is 8 bytes less than 512 because there are two possible
11712 alignment words. We can't tell here if they will be present or not so we
11713 have to play it safe and assume that they are. */
11714 if ((CALLER_INTERWORKING_SLOT_SIZE +
11715 ROUND_UP_WORD (get_frame_size ()) +
11716 crtl->outgoing_args_size) >= 504)
11718 /* This is the same as the code in thumb1_expand_prologue() which
11719 determines which register to use for stack decrement. */
11720 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
11721 if (mask & (1 << reg))
11724 if (reg > LAST_LO_REGNUM)
11726 /* Make sure we have a register available for stack decrement. */
11727 mask |= 1 << LAST_LO_REGNUM;
11735 /* Return the number of bytes required to save VFP registers. */
11737 arm_get_vfp_saved_size (void)
11739 unsigned int regno;
11744 /* Space for saved VFP registers. */
11745 if (TARGET_HARD_FLOAT && TARGET_VFP)
11748 for (regno = FIRST_VFP_REGNUM;
11749 regno < LAST_VFP_REGNUM;
11752 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
11753 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
11757 /* Workaround ARM10 VFPr1 bug. */
11758 if (count == 2 && !arm_arch6)
11760 saved += count * 8;
11769 if (count == 2 && !arm_arch6)
11771 saved += count * 8;
11778 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
11779 everything bar the final return instruction. */
11781 output_return_instruction (rtx operand, int really_return, int reverse)
11783 char conditional[10];
11786 unsigned long live_regs_mask;
11787 unsigned long func_type;
11788 arm_stack_offsets *offsets;
11790 func_type = arm_current_func_type ();
11792 if (IS_NAKED (func_type))
11795 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11797 /* If this function was declared non-returning, and we have
11798 found a tail call, then we have to trust that the called
11799 function won't return. */
11804 /* Otherwise, trap an attempted return by aborting. */
11806 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
11808 assemble_external_libcall (ops[1]);
11809 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
11815 gcc_assert (!cfun->calls_alloca || really_return);
11817 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
11819 cfun->machine->return_used_this_function = 1;
11821 offsets = arm_get_frame_offsets ();
11822 live_regs_mask = offsets->saved_regs_mask;
11824 if (live_regs_mask)
11826 const char * return_reg;
11828 /* If we do not have any special requirements for function exit
11829 (e.g. interworking) then we can load the return address
11830 directly into the PC. Otherwise we must load it into LR. */
11832 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
11833 return_reg = reg_names[PC_REGNUM];
11835 return_reg = reg_names[LR_REGNUM];
11837 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
11839 /* There are three possible reasons for the IP register
11840 being saved. 1) a stack frame was created, in which case
11841 IP contains the old stack pointer, or 2) an ISR routine
11842 corrupted it, or 3) it was saved to align the stack on
11843 iWMMXt. In case 1, restore IP into SP, otherwise just
11845 if (frame_pointer_needed)
11847 live_regs_mask &= ~ (1 << IP_REGNUM);
11848 live_regs_mask |= (1 << SP_REGNUM);
11851 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
11854 /* On some ARM architectures it is faster to use LDR rather than
11855 LDM to load a single register. On other architectures, the
11856 cost is the same. In 26 bit mode, or for exception handlers,
11857 we have to use LDM to load the PC so that the CPSR is also
11859 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11860 if (live_regs_mask == (1U << reg))
11863 if (reg <= LAST_ARM_REGNUM
11864 && (reg != LR_REGNUM
11866 || ! IS_INTERRUPT (func_type)))
11868 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
11869 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
11876 /* Generate the load multiple instruction to restore the
11877 registers. Note we can get here, even if
11878 frame_pointer_needed is true, but only if sp already
11879 points to the base of the saved core registers. */
11880 if (live_regs_mask & (1 << SP_REGNUM))
11882 unsigned HOST_WIDE_INT stack_adjust;
11884 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
11885 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
11887 if (stack_adjust && arm_arch5 && TARGET_ARM)
11888 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
11891 /* If we can't use ldmib (SA110 bug),
11892 then try to pop r3 instead. */
11894 live_regs_mask |= 1 << 3;
11895 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
11899 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
11901 p = instr + strlen (instr);
11903 for (reg = 0; reg <= SP_REGNUM; reg++)
11904 if (live_regs_mask & (1 << reg))
11906 int l = strlen (reg_names[reg]);
11912 memcpy (p, ", ", 2);
11916 memcpy (p, "%|", 2);
11917 memcpy (p + 2, reg_names[reg], l);
11921 if (live_regs_mask & (1 << LR_REGNUM))
11923 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
11924 /* If returning from an interrupt, restore the CPSR. */
11925 if (IS_INTERRUPT (func_type))
11932 output_asm_insn (instr, & operand);
11934 /* See if we need to generate an extra instruction to
11935 perform the actual function return. */
11937 && func_type != ARM_FT_INTERWORKED
11938 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
11940 /* The return has already been handled
11941 by loading the LR into the PC. */
11948 switch ((int) ARM_FUNC_TYPE (func_type))
11952 /* ??? This is wrong for unified assembly syntax. */
11953 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
11956 case ARM_FT_INTERWORKED:
11957 sprintf (instr, "bx%s\t%%|lr", conditional);
11960 case ARM_FT_EXCEPTION:
11961 /* ??? This is wrong for unified assembly syntax. */
11962 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
11966 /* Use bx if it's available. */
11967 if (arm_arch5 || arm_arch4t)
11968 sprintf (instr, "bx%s\t%%|lr", conditional);
11970 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
11974 output_asm_insn (instr, & operand);
11980 /* Write the function name into the code section, directly preceding
11981 the function prologue.
11983 Code will be output similar to this:
11985 .ascii "arm_poke_function_name", 0
11988 .word 0xff000000 + (t1 - t0)
11989 arm_poke_function_name
11991 stmfd sp!, {fp, ip, lr, pc}
11994 When performing a stack backtrace, code can inspect the value
11995 of 'pc' stored at 'fp' + 0. If the trace function then looks
11996 at location pc - 12 and the top 8 bits are set, then we know
11997 that there is a function name embedded immediately preceding this
11998 location and has length ((pc[-3]) & 0xff000000).
12000 We assume that pc is declared as a pointer to an unsigned long.
12002 It is of no benefit to output the function name if we are assembling
12003 a leaf function. These function types will not contain a stack
12004 backtrace structure, therefore it is not possible to determine the
12007 arm_poke_function_name (FILE *stream, const char *name)
12009 unsigned long alignlength;
12010 unsigned long length;
12013 length = strlen (name) + 1;
12014 alignlength = ROUND_UP_WORD (length);
12016 ASM_OUTPUT_ASCII (stream, name, length);
12017 ASM_OUTPUT_ALIGN (stream, 2);
12018 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
12019 assemble_aligned_integer (UNITS_PER_WORD, x);
12022 /* Place some comments into the assembler stream
12023 describing the current function. */
12025 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
12027 unsigned long func_type;
12031 thumb1_output_function_prologue (f, frame_size);
12035 /* Sanity check. */
12036 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
12038 func_type = arm_current_func_type ();
12040 switch ((int) ARM_FUNC_TYPE (func_type))
12043 case ARM_FT_NORMAL:
12045 case ARM_FT_INTERWORKED:
12046 asm_fprintf (f, "\t%@ Function supports interworking.\n");
12049 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
12052 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
12054 case ARM_FT_EXCEPTION:
12055 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
12059 if (IS_NAKED (func_type))
12060 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
12062 if (IS_VOLATILE (func_type))
12063 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
12065 if (IS_NESTED (func_type))
12066 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
12067 if (IS_STACKALIGN (func_type))
12068 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
12070 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
12072 crtl->args.pretend_args_size, frame_size);
12074 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
12075 frame_pointer_needed,
12076 cfun->machine->uses_anonymous_args);
12078 if (cfun->machine->lr_save_eliminated)
12079 asm_fprintf (f, "\t%@ link register save eliminated.\n");
12081 if (crtl->calls_eh_return)
12082 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
12087 arm_output_epilogue (rtx sibling)
12090 unsigned long saved_regs_mask;
12091 unsigned long func_type;
12092 /* Floats_offset is the offset from the "virtual" frame. In an APCS
12093 frame that is $fp + 4 for a non-variadic function. */
12094 int floats_offset = 0;
12096 FILE * f = asm_out_file;
12097 unsigned int lrm_count = 0;
12098 int really_return = (sibling == NULL);
12100 arm_stack_offsets *offsets;
12102 /* If we have already generated the return instruction
12103 then it is futile to generate anything else. */
12104 if (use_return_insn (FALSE, sibling) &&
12105 (cfun->machine->return_used_this_function != 0))
12108 func_type = arm_current_func_type ();
12110 if (IS_NAKED (func_type))
12111 /* Naked functions don't have epilogues. */
12114 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
12118 /* A volatile function should never return. Call abort. */
12119 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
12120 assemble_external_libcall (op);
12121 output_asm_insn ("bl\t%a0", &op);
12126 /* If we are throwing an exception, then we really must be doing a
12127 return, so we can't tail-call. */
12128 gcc_assert (!crtl->calls_eh_return || really_return);
12130 offsets = arm_get_frame_offsets ();
12131 saved_regs_mask = offsets->saved_regs_mask;
12134 lrm_count = bit_count (saved_regs_mask);
12136 floats_offset = offsets->saved_args;
12137 /* Compute how far away the floats will be. */
12138 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
12139 if (saved_regs_mask & (1 << reg))
12140 floats_offset += 4;
12142 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
12144 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
12145 int vfp_offset = offsets->frame;
12147 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12149 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12150 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12152 floats_offset += 12;
12153 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
12154 reg, FP_REGNUM, floats_offset - vfp_offset);
12159 start_reg = LAST_FPA_REGNUM;
12161 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12163 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12165 floats_offset += 12;
12167 /* We can't unstack more than four registers at once. */
12168 if (start_reg - reg == 3)
12170 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
12171 reg, FP_REGNUM, floats_offset - vfp_offset);
12172 start_reg = reg - 1;
12177 if (reg != start_reg)
12178 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
12179 reg + 1, start_reg - reg,
12180 FP_REGNUM, floats_offset - vfp_offset);
12181 start_reg = reg - 1;
12185 /* Just in case the last register checked also needs unstacking. */
12186 if (reg != start_reg)
12187 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
12188 reg + 1, start_reg - reg,
12189 FP_REGNUM, floats_offset - vfp_offset);
12192 if (TARGET_HARD_FLOAT && TARGET_VFP)
12196 /* The fldmd insns do not have base+offset addressing
12197 modes, so we use IP to hold the address. */
12198 saved_size = arm_get_vfp_saved_size ();
12200 if (saved_size > 0)
12202 floats_offset += saved_size;
12203 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
12204 FP_REGNUM, floats_offset - vfp_offset);
12206 start_reg = FIRST_VFP_REGNUM;
12207 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12209 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12210 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12212 if (start_reg != reg)
12213 vfp_output_fldmd (f, IP_REGNUM,
12214 (start_reg - FIRST_VFP_REGNUM) / 2,
12215 (reg - start_reg) / 2);
12216 start_reg = reg + 2;
12219 if (start_reg != reg)
12220 vfp_output_fldmd (f, IP_REGNUM,
12221 (start_reg - FIRST_VFP_REGNUM) / 2,
12222 (reg - start_reg) / 2);
12227 /* The frame pointer is guaranteed to be non-double-word aligned.
12228 This is because it is set to (old_stack_pointer - 4) and the
12229 old_stack_pointer was double word aligned. Thus the offset to
12230 the iWMMXt registers to be loaded must also be non-double-word
12231 sized, so that the resultant address *is* double-word aligned.
12232 We can ignore floats_offset since that was already included in
12233 the live_regs_mask. */
12234 lrm_count += (lrm_count % 2 ? 2 : 1);
12236 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12237 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12239 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
12240 reg, FP_REGNUM, lrm_count * 4);
12245 /* saved_regs_mask should contain the IP, which at the time of stack
12246 frame generation actually contains the old stack pointer. So a
12247 quick way to unwind the stack is just pop the IP register directly
12248 into the stack pointer. */
12249 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
12250 saved_regs_mask &= ~ (1 << IP_REGNUM);
12251 saved_regs_mask |= (1 << SP_REGNUM);
12253 /* There are two registers left in saved_regs_mask - LR and PC. We
12254 only need to restore the LR register (the return address), but to
12255 save time we can load it directly into the PC, unless we need a
12256 special function exit sequence, or we are not really returning. */
12258 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
12259 && !crtl->calls_eh_return)
12260 /* Delete the LR from the register mask, so that the LR on
12261 the stack is loaded into the PC in the register mask. */
12262 saved_regs_mask &= ~ (1 << LR_REGNUM);
12264 saved_regs_mask &= ~ (1 << PC_REGNUM);
12266 /* We must use SP as the base register, because SP is one of the
12267 registers being restored. If an interrupt or page fault
12268 happens in the ldm instruction, the SP might or might not
12269 have been restored. That would be bad, as then SP will no
12270 longer indicate the safe area of stack, and we can get stack
12271 corruption. Using SP as the base register means that it will
12272 be reset correctly to the original value, should an interrupt
12273 occur. If the stack pointer already points at the right
12274 place, then omit the subtraction. */
12275 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
12276 || cfun->calls_alloca)
12277 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
12278 4 * bit_count (saved_regs_mask));
12279 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
12281 if (IS_INTERRUPT (func_type))
12282 /* Interrupt handlers will have pushed the
12283 IP onto the stack, so restore it now. */
12284 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
12288 /* This branch is executed for ARM mode (non-apcs frames) and
12289 Thumb-2 mode. Frame layout is essentially the same for those
12290 cases, except that in ARM mode frame pointer points to the
12291 first saved register, while in Thumb-2 mode the frame pointer points
12292 to the last saved register.
12294 It is possible to make frame pointer point to last saved
12295 register in both cases, and remove some conditionals below.
12296 That means that fp setup in prologue would be just "mov fp, sp"
12297 and sp restore in epilogue would be just "mov sp, fp", whereas
12298 now we have to use add/sub in those cases. However, the value
12299 of that would be marginal, as both mov and add/sub are 32-bit
12300 in ARM mode, and it would require extra conditionals
12301 in arm_expand_prologue to distingish ARM-apcs-frame case
12302 (where frame pointer is required to point at first register)
12303 and ARM-non-apcs-frame. Therefore, such change is postponed
12304 until real need arise. */
12305 unsigned HOST_WIDE_INT amount;
12307 /* Restore stack pointer if necessary. */
12308 if (TARGET_ARM && frame_pointer_needed)
12310 operands[0] = stack_pointer_rtx;
12311 operands[1] = hard_frame_pointer_rtx;
12313 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
12314 output_add_immediate (operands);
12318 if (frame_pointer_needed)
12320 /* For Thumb-2 restore sp from the frame pointer.
12321 Operand restrictions mean we have to incrememnt FP, then copy
12323 amount = offsets->locals_base - offsets->saved_regs;
12324 operands[0] = hard_frame_pointer_rtx;
12328 unsigned long count;
12329 operands[0] = stack_pointer_rtx;
12330 amount = offsets->outgoing_args - offsets->saved_regs;
12331 /* pop call clobbered registers if it avoids a
12332 separate stack adjustment. */
12333 count = offsets->saved_regs - offsets->saved_args;
12336 && !crtl->calls_eh_return
12337 && bit_count(saved_regs_mask) * 4 == count
12338 && !IS_INTERRUPT (func_type)
12339 && !crtl->tail_call_emit)
12341 unsigned long mask;
12342 mask = (1 << (arm_size_return_regs() / 4)) - 1;
12344 mask &= ~saved_regs_mask;
12346 while (bit_count (mask) * 4 > amount)
12348 while ((mask & (1 << reg)) == 0)
12350 mask &= ~(1 << reg);
12352 if (bit_count (mask) * 4 == amount) {
12354 saved_regs_mask |= mask;
12361 operands[1] = operands[0];
12362 operands[2] = GEN_INT (amount);
12363 output_add_immediate (operands);
12365 if (frame_pointer_needed)
12366 asm_fprintf (f, "\tmov\t%r, %r\n",
12367 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
12370 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12372 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
12373 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12374 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
12379 start_reg = FIRST_FPA_REGNUM;
12381 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
12383 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12385 if (reg - start_reg == 3)
12387 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
12388 start_reg, SP_REGNUM);
12389 start_reg = reg + 1;
12394 if (reg != start_reg)
12395 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
12396 start_reg, reg - start_reg,
12399 start_reg = reg + 1;
12403 /* Just in case the last register checked also needs unstacking. */
12404 if (reg != start_reg)
12405 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
12406 start_reg, reg - start_reg, SP_REGNUM);
12409 if (TARGET_HARD_FLOAT && TARGET_VFP)
12411 start_reg = FIRST_VFP_REGNUM;
12412 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12414 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12415 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12417 if (start_reg != reg)
12418 vfp_output_fldmd (f, SP_REGNUM,
12419 (start_reg - FIRST_VFP_REGNUM) / 2,
12420 (reg - start_reg) / 2);
12421 start_reg = reg + 2;
12424 if (start_reg != reg)
12425 vfp_output_fldmd (f, SP_REGNUM,
12426 (start_reg - FIRST_VFP_REGNUM) / 2,
12427 (reg - start_reg) / 2);
12430 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
12431 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12432 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
12434 /* If we can, restore the LR into the PC. */
12435 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
12436 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
12437 && !IS_STACKALIGN (func_type)
12439 && crtl->args.pretend_args_size == 0
12440 && saved_regs_mask & (1 << LR_REGNUM)
12441 && !crtl->calls_eh_return)
12443 saved_regs_mask &= ~ (1 << LR_REGNUM);
12444 saved_regs_mask |= (1 << PC_REGNUM);
12445 rfe = IS_INTERRUPT (func_type);
12450 /* Load the registers off the stack. If we only have one register
12451 to load use the LDR instruction - it is faster. For Thumb-2
12452 always use pop and the assembler will pick the best instruction.*/
12453 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
12454 && !IS_INTERRUPT(func_type))
12456 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
12458 else if (saved_regs_mask)
12460 if (saved_regs_mask & (1 << SP_REGNUM))
12461 /* Note - write back to the stack register is not enabled
12462 (i.e. "ldmfd sp!..."). We know that the stack pointer is
12463 in the list of registers and if we add writeback the
12464 instruction becomes UNPREDICTABLE. */
12465 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
12467 else if (TARGET_ARM)
12468 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
12471 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
12474 if (crtl->args.pretend_args_size)
12476 /* Unwind the pre-pushed regs. */
12477 operands[0] = operands[1] = stack_pointer_rtx;
12478 operands[2] = GEN_INT (crtl->args.pretend_args_size);
12479 output_add_immediate (operands);
12483 /* We may have already restored PC directly from the stack. */
12484 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
12487 /* Stack adjustment for exception handler. */
12488 if (crtl->calls_eh_return)
12489 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
12490 ARM_EH_STACKADJ_REGNUM);
12492 /* Generate the return instruction. */
12493 switch ((int) ARM_FUNC_TYPE (func_type))
12497 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
12500 case ARM_FT_EXCEPTION:
12501 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
12504 case ARM_FT_INTERWORKED:
12505 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
12509 if (IS_STACKALIGN (func_type))
12511 /* See comment in arm_expand_prologue. */
12512 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
12514 if (arm_arch5 || arm_arch4t)
12515 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
12517 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
12525 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
12526 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
12528 arm_stack_offsets *offsets;
12534 /* Emit any call-via-reg trampolines that are needed for v4t support
12535 of call_reg and call_value_reg type insns. */
12536 for (regno = 0; regno < LR_REGNUM; regno++)
12538 rtx label = cfun->machine->call_via[regno];
12542 switch_to_section (function_section (current_function_decl));
12543 targetm.asm_out.internal_label (asm_out_file, "L",
12544 CODE_LABEL_NUMBER (label));
12545 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
12549 /* ??? Probably not safe to set this here, since it assumes that a
12550 function will be emitted as assembly immediately after we generate
12551 RTL for it. This does not happen for inline functions. */
12552 cfun->machine->return_used_this_function = 0;
12554 else /* TARGET_32BIT */
12556 /* We need to take into account any stack-frame rounding. */
12557 offsets = arm_get_frame_offsets ();
12559 gcc_assert (!use_return_insn (FALSE, NULL)
12560 || (cfun->machine->return_used_this_function != 0)
12561 || offsets->saved_regs == offsets->outgoing_args
12562 || frame_pointer_needed);
12564 /* Reset the ARM-specific per-function variables. */
12565 after_arm_reorg = 0;
12569 /* Generate and emit an insn that we will recognize as a push_multi.
12570 Unfortunately, since this insn does not reflect very well the actual
12571 semantics of the operation, we need to annotate the insn for the benefit
12572 of DWARF2 frame unwind information. */
12574 emit_multi_reg_push (unsigned long mask)
12577 int num_dwarf_regs;
12581 int dwarf_par_index;
12584 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12585 if (mask & (1 << i))
12588 gcc_assert (num_regs && num_regs <= 16);
12590 /* We don't record the PC in the dwarf frame information. */
12591 num_dwarf_regs = num_regs;
12592 if (mask & (1 << PC_REGNUM))
12595 /* For the body of the insn we are going to generate an UNSPEC in
12596 parallel with several USEs. This allows the insn to be recognized
12597 by the push_multi pattern in the arm.md file. The insn looks
12598 something like this:
12601 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
12602 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
12603 (use (reg:SI 11 fp))
12604 (use (reg:SI 12 ip))
12605 (use (reg:SI 14 lr))
12606 (use (reg:SI 15 pc))
12609 For the frame note however, we try to be more explicit and actually
12610 show each register being stored into the stack frame, plus a (single)
12611 decrement of the stack pointer. We do it this way in order to be
12612 friendly to the stack unwinding code, which only wants to see a single
12613 stack decrement per instruction. The RTL we generate for the note looks
12614 something like this:
12617 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
12618 (set (mem:SI (reg:SI sp)) (reg:SI r4))
12619 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
12620 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
12621 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
12624 This sequence is used both by the code to support stack unwinding for
12625 exceptions handlers and the code to generate dwarf2 frame debugging. */
12627 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
12628 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
12629 dwarf_par_index = 1;
12631 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12633 if (mask & (1 << i))
12635 reg = gen_rtx_REG (SImode, i);
12637 XVECEXP (par, 0, 0)
12638 = gen_rtx_SET (VOIDmode,
12639 gen_frame_mem (BLKmode,
12640 gen_rtx_PRE_DEC (BLKmode,
12641 stack_pointer_rtx)),
12642 gen_rtx_UNSPEC (BLKmode,
12643 gen_rtvec (1, reg),
12644 UNSPEC_PUSH_MULT));
12646 if (i != PC_REGNUM)
12648 tmp = gen_rtx_SET (VOIDmode,
12649 gen_frame_mem (SImode, stack_pointer_rtx),
12651 RTX_FRAME_RELATED_P (tmp) = 1;
12652 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
12660 for (j = 1, i++; j < num_regs; i++)
12662 if (mask & (1 << i))
12664 reg = gen_rtx_REG (SImode, i);
12666 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
12668 if (i != PC_REGNUM)
12671 = gen_rtx_SET (VOIDmode,
12672 gen_frame_mem (SImode,
12673 plus_constant (stack_pointer_rtx,
12676 RTX_FRAME_RELATED_P (tmp) = 1;
12677 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
12684 par = emit_insn (par);
12686 tmp = gen_rtx_SET (VOIDmode,
12688 plus_constant (stack_pointer_rtx, -4 * num_regs));
12689 RTX_FRAME_RELATED_P (tmp) = 1;
12690 XVECEXP (dwarf, 0, 0) = tmp;
12692 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12697 /* Calculate the size of the return value that is passed in registers. */
12699 arm_size_return_regs (void)
12701 enum machine_mode mode;
12703 if (crtl->return_rtx != 0)
12704 mode = GET_MODE (crtl->return_rtx);
12706 mode = DECL_MODE (DECL_RESULT (current_function_decl));
12708 return GET_MODE_SIZE (mode);
12712 emit_sfm (int base_reg, int count)
12719 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12720 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12722 reg = gen_rtx_REG (XFmode, base_reg++);
12724 XVECEXP (par, 0, 0)
12725 = gen_rtx_SET (VOIDmode,
12726 gen_frame_mem (BLKmode,
12727 gen_rtx_PRE_DEC (BLKmode,
12728 stack_pointer_rtx)),
12729 gen_rtx_UNSPEC (BLKmode,
12730 gen_rtvec (1, reg),
12731 UNSPEC_PUSH_MULT));
12732 tmp = gen_rtx_SET (VOIDmode,
12733 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
12734 RTX_FRAME_RELATED_P (tmp) = 1;
12735 XVECEXP (dwarf, 0, 1) = tmp;
12737 for (i = 1; i < count; i++)
12739 reg = gen_rtx_REG (XFmode, base_reg++);
12740 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12742 tmp = gen_rtx_SET (VOIDmode,
12743 gen_frame_mem (XFmode,
12744 plus_constant (stack_pointer_rtx,
12747 RTX_FRAME_RELATED_P (tmp) = 1;
12748 XVECEXP (dwarf, 0, i + 1) = tmp;
12751 tmp = gen_rtx_SET (VOIDmode,
12753 plus_constant (stack_pointer_rtx, -12 * count));
12755 RTX_FRAME_RELATED_P (tmp) = 1;
12756 XVECEXP (dwarf, 0, 0) = tmp;
12758 par = emit_insn (par);
12759 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12765 /* Return true if the current function needs to save/restore LR. */
12768 thumb_force_lr_save (void)
12770 return !cfun->machine->lr_save_eliminated
12771 && (!leaf_function_p ()
12772 || thumb_far_jump_used_p ()
12773 || df_regs_ever_live_p (LR_REGNUM));
12777 /* Compute the distance from register FROM to register TO.
12778 These can be the arg pointer (26), the soft frame pointer (25),
12779 the stack pointer (13) or the hard frame pointer (11).
12780 In thumb mode r7 is used as the soft frame pointer, if needed.
12781 Typical stack layout looks like this:
12783 old stack pointer -> | |
12786 | | saved arguments for
12787 | | vararg functions
12790 hard FP & arg pointer -> | | \
12798 soft frame pointer -> | | /
12803 locals base pointer -> | | /
12808 current stack pointer -> | | /
12811 For a given function some or all of these stack components
12812 may not be needed, giving rise to the possibility of
12813 eliminating some of the registers.
12815 The values returned by this function must reflect the behavior
12816 of arm_expand_prologue() and arm_compute_save_reg_mask().
12818 The sign of the number returned reflects the direction of stack
12819 growth, so the values are positive for all eliminations except
12820 from the soft frame pointer to the hard frame pointer.
12822 SFP may point just inside the local variables block to ensure correct
12826 /* Calculate stack offsets. These are used to calculate register elimination
12827 offsets and in prologue/epilogue code. Also calculates which registers
12828 should be saved. */
12830 static arm_stack_offsets *
12831 arm_get_frame_offsets (void)
12833 struct arm_stack_offsets *offsets;
12834 unsigned long func_type;
12838 HOST_WIDE_INT frame_size;
12841 offsets = &cfun->machine->stack_offsets;
12843 /* We need to know if we are a leaf function. Unfortunately, it
12844 is possible to be called after start_sequence has been called,
12845 which causes get_insns to return the insns for the sequence,
12846 not the function, which will cause leaf_function_p to return
12847 the incorrect result.
12849 to know about leaf functions once reload has completed, and the
12850 frame size cannot be changed after that time, so we can safely
12851 use the cached value. */
12853 if (reload_completed)
12856 /* Initially this is the size of the local variables. It will translated
12857 into an offset once we have determined the size of preceding data. */
12858 frame_size = ROUND_UP_WORD (get_frame_size ());
12860 leaf = leaf_function_p ();
12862 /* Space for variadic functions. */
12863 offsets->saved_args = crtl->args.pretend_args_size;
12865 /* In Thumb mode this is incorrect, but never used. */
12866 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
12867 arm_compute_static_chain_stack_bytes();
12871 unsigned int regno;
12873 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
12874 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12875 saved = core_saved;
12877 /* We know that SP will be doubleword aligned on entry, and we must
12878 preserve that condition at any subroutine call. We also require the
12879 soft frame pointer to be doubleword aligned. */
12881 if (TARGET_REALLY_IWMMXT)
12883 /* Check for the call-saved iWMMXt registers. */
12884 for (regno = FIRST_IWMMXT_REGNUM;
12885 regno <= LAST_IWMMXT_REGNUM;
12887 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12891 func_type = arm_current_func_type ();
12892 if (! IS_VOLATILE (func_type))
12894 /* Space for saved FPA registers. */
12895 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
12896 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12899 /* Space for saved VFP registers. */
12900 if (TARGET_HARD_FLOAT && TARGET_VFP)
12901 saved += arm_get_vfp_saved_size ();
12904 else /* TARGET_THUMB1 */
12906 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
12907 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12908 saved = core_saved;
12909 if (TARGET_BACKTRACE)
12913 /* Saved registers include the stack frame. */
12914 offsets->saved_regs = offsets->saved_args + saved +
12915 arm_compute_static_chain_stack_bytes();
12916 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
12917 /* A leaf function does not need any stack alignment if it has nothing
12919 if (leaf && frame_size == 0)
12921 offsets->outgoing_args = offsets->soft_frame;
12922 offsets->locals_base = offsets->soft_frame;
12926 /* Ensure SFP has the correct alignment. */
12927 if (ARM_DOUBLEWORD_ALIGN
12928 && (offsets->soft_frame & 7))
12930 offsets->soft_frame += 4;
12931 /* Try to align stack by pushing an extra reg. Don't bother doing this
12932 when there is a stack frame as the alignment will be rolled into
12933 the normal stack adjustment. */
12934 if (frame_size + crtl->outgoing_args_size == 0)
12938 /* If it is safe to use r3, then do so. This sometimes
12939 generates better code on Thumb-2 by avoiding the need to
12940 use 32-bit push/pop instructions. */
12941 if (!crtl->tail_call_emit
12942 && arm_size_return_regs () <= 12)
12947 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
12949 if ((offsets->saved_regs_mask & (1 << i)) == 0)
12958 offsets->saved_regs += 4;
12959 offsets->saved_regs_mask |= (1 << reg);
12964 offsets->locals_base = offsets->soft_frame + frame_size;
12965 offsets->outgoing_args = (offsets->locals_base
12966 + crtl->outgoing_args_size);
12968 if (ARM_DOUBLEWORD_ALIGN)
12970 /* Ensure SP remains doubleword aligned. */
12971 if (offsets->outgoing_args & 7)
12972 offsets->outgoing_args += 4;
12973 gcc_assert (!(offsets->outgoing_args & 7));
12980 /* Calculate the relative offsets for the different stack pointers. Positive
12981 offsets are in the direction of stack growth. */
12984 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
12986 arm_stack_offsets *offsets;
12988 offsets = arm_get_frame_offsets ();
12990 /* OK, now we have enough information to compute the distances.
12991 There must be an entry in these switch tables for each pair
12992 of registers in ELIMINABLE_REGS, even if some of the entries
12993 seem to be redundant or useless. */
12996 case ARG_POINTER_REGNUM:
12999 case THUMB_HARD_FRAME_POINTER_REGNUM:
13002 case FRAME_POINTER_REGNUM:
13003 /* This is the reverse of the soft frame pointer
13004 to hard frame pointer elimination below. */
13005 return offsets->soft_frame - offsets->saved_args;
13007 case ARM_HARD_FRAME_POINTER_REGNUM:
13008 /* This is only non-zero in the case where the static chain register
13009 is stored above the frame. */
13010 return offsets->frame - offsets->saved_args - 4;
13012 case STACK_POINTER_REGNUM:
13013 /* If nothing has been pushed on the stack at all
13014 then this will return -4. This *is* correct! */
13015 return offsets->outgoing_args - (offsets->saved_args + 4);
13018 gcc_unreachable ();
13020 gcc_unreachable ();
13022 case FRAME_POINTER_REGNUM:
13025 case THUMB_HARD_FRAME_POINTER_REGNUM:
13028 case ARM_HARD_FRAME_POINTER_REGNUM:
13029 /* The hard frame pointer points to the top entry in the
13030 stack frame. The soft frame pointer to the bottom entry
13031 in the stack frame. If there is no stack frame at all,
13032 then they are identical. */
13034 return offsets->frame - offsets->soft_frame;
13036 case STACK_POINTER_REGNUM:
13037 return offsets->outgoing_args - offsets->soft_frame;
13040 gcc_unreachable ();
13042 gcc_unreachable ();
13045 /* You cannot eliminate from the stack pointer.
13046 In theory you could eliminate from the hard frame
13047 pointer to the stack pointer, but this will never
13048 happen, since if a stack frame is not needed the
13049 hard frame pointer will never be used. */
13050 gcc_unreachable ();
13055 /* Emit RTL to save coprocessor registers on function entry. Returns the
13056 number of bytes pushed. */
13059 arm_save_coproc_regs(void)
13061 int saved_size = 0;
13063 unsigned start_reg;
13066 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13067 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13069 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
13070 insn = gen_rtx_MEM (V2SImode, insn);
13071 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
13072 RTX_FRAME_RELATED_P (insn) = 1;
13076 /* Save any floating point call-saved registers used by this
13078 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
13080 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13081 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13083 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
13084 insn = gen_rtx_MEM (XFmode, insn);
13085 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
13086 RTX_FRAME_RELATED_P (insn) = 1;
13092 start_reg = LAST_FPA_REGNUM;
13094 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13096 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13098 if (start_reg - reg == 3)
13100 insn = emit_sfm (reg, 4);
13101 RTX_FRAME_RELATED_P (insn) = 1;
13103 start_reg = reg - 1;
13108 if (start_reg != reg)
13110 insn = emit_sfm (reg + 1, start_reg - reg);
13111 RTX_FRAME_RELATED_P (insn) = 1;
13112 saved_size += (start_reg - reg) * 12;
13114 start_reg = reg - 1;
13118 if (start_reg != reg)
13120 insn = emit_sfm (reg + 1, start_reg - reg);
13121 saved_size += (start_reg - reg) * 12;
13122 RTX_FRAME_RELATED_P (insn) = 1;
13125 if (TARGET_HARD_FLOAT && TARGET_VFP)
13127 start_reg = FIRST_VFP_REGNUM;
13129 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13131 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13132 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13134 if (start_reg != reg)
13135 saved_size += vfp_emit_fstmd (start_reg,
13136 (reg - start_reg) / 2);
13137 start_reg = reg + 2;
13140 if (start_reg != reg)
13141 saved_size += vfp_emit_fstmd (start_reg,
13142 (reg - start_reg) / 2);
13148 /* Set the Thumb frame pointer from the stack pointer. */
13151 thumb_set_frame_pointer (arm_stack_offsets *offsets)
13153 HOST_WIDE_INT amount;
13156 amount = offsets->outgoing_args - offsets->locals_base;
13158 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13159 stack_pointer_rtx, GEN_INT (amount)));
13162 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
13163 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
13164 expects the first two operands to be the same. */
13167 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13169 hard_frame_pointer_rtx));
13173 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13174 hard_frame_pointer_rtx,
13175 stack_pointer_rtx));
13177 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
13178 plus_constant (stack_pointer_rtx, amount));
13179 RTX_FRAME_RELATED_P (dwarf) = 1;
13180 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
13183 RTX_FRAME_RELATED_P (insn) = 1;
13186 /* Generate the prologue instructions for entry into an ARM or Thumb-2
13189 arm_expand_prologue (void)
13194 unsigned long live_regs_mask;
13195 unsigned long func_type;
13197 int saved_pretend_args = 0;
13198 int saved_regs = 0;
13199 unsigned HOST_WIDE_INT args_to_push;
13200 arm_stack_offsets *offsets;
13202 func_type = arm_current_func_type ();
13204 /* Naked functions don't have prologues. */
13205 if (IS_NAKED (func_type))
13208 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
13209 args_to_push = crtl->args.pretend_args_size;
13211 /* Compute which register we will have to save onto the stack. */
13212 offsets = arm_get_frame_offsets ();
13213 live_regs_mask = offsets->saved_regs_mask;
13215 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
13217 if (IS_STACKALIGN (func_type))
13222 /* Handle a word-aligned stack pointer. We generate the following:
13227 <save and restore r0 in normal prologue/epilogue>
13231 The unwinder doesn't need to know about the stack realignment.
13232 Just tell it we saved SP in r0. */
13233 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
13235 r0 = gen_rtx_REG (SImode, 0);
13236 r1 = gen_rtx_REG (SImode, 1);
13237 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
13238 compiler won't choke. */
13239 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
13240 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
13241 insn = gen_movsi (r0, stack_pointer_rtx);
13242 RTX_FRAME_RELATED_P (insn) = 1;
13243 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
13245 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
13246 emit_insn (gen_movsi (stack_pointer_rtx, r1));
13249 /* For APCS frames, if IP register is clobbered
13250 when creating frame, save that register in a special
13252 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13254 if (IS_INTERRUPT (func_type))
13256 /* Interrupt functions must not corrupt any registers.
13257 Creating a frame pointer however, corrupts the IP
13258 register, so we must push it first. */
13259 insn = emit_multi_reg_push (1 << IP_REGNUM);
13261 /* Do not set RTX_FRAME_RELATED_P on this insn.
13262 The dwarf stack unwinding code only wants to see one
13263 stack decrement per function, and this is not it. If
13264 this instruction is labeled as being part of the frame
13265 creation sequence then dwarf2out_frame_debug_expr will
13266 die when it encounters the assignment of IP to FP
13267 later on, since the use of SP here establishes SP as
13268 the CFA register and not IP.
13270 Anyway this instruction is not really part of the stack
13271 frame creation although it is part of the prologue. */
13273 else if (IS_NESTED (func_type))
13275 /* The Static chain register is the same as the IP register
13276 used as a scratch register during stack frame creation.
13277 To get around this need to find somewhere to store IP
13278 whilst the frame is being created. We try the following
13281 1. The last argument register.
13282 2. A slot on the stack above the frame. (This only
13283 works if the function is not a varargs function).
13284 3. Register r3, after pushing the argument registers
13287 Note - we only need to tell the dwarf2 backend about the SP
13288 adjustment in the second variant; the static chain register
13289 doesn't need to be unwound, as it doesn't contain a value
13290 inherited from the caller. */
13292 if (df_regs_ever_live_p (3) == false)
13293 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
13294 else if (args_to_push == 0)
13298 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
13301 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
13302 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
13305 /* Just tell the dwarf backend that we adjusted SP. */
13306 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13307 plus_constant (stack_pointer_rtx,
13309 RTX_FRAME_RELATED_P (insn) = 1;
13310 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
13314 /* Store the args on the stack. */
13315 if (cfun->machine->uses_anonymous_args)
13316 insn = emit_multi_reg_push
13317 ((0xf0 >> (args_to_push / 4)) & 0xf);
13320 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13321 GEN_INT (- args_to_push)));
13323 RTX_FRAME_RELATED_P (insn) = 1;
13325 saved_pretend_args = 1;
13326 fp_offset = args_to_push;
13329 /* Now reuse r3 to preserve IP. */
13330 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
13334 insn = emit_set_insn (ip_rtx,
13335 plus_constant (stack_pointer_rtx, fp_offset));
13336 RTX_FRAME_RELATED_P (insn) = 1;
13341 /* Push the argument registers, or reserve space for them. */
13342 if (cfun->machine->uses_anonymous_args)
13343 insn = emit_multi_reg_push
13344 ((0xf0 >> (args_to_push / 4)) & 0xf);
13347 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13348 GEN_INT (- args_to_push)));
13349 RTX_FRAME_RELATED_P (insn) = 1;
13352 /* If this is an interrupt service routine, and the link register
13353 is going to be pushed, and we're not generating extra
13354 push of IP (needed when frame is needed and frame layout if apcs),
13355 subtracting four from LR now will mean that the function return
13356 can be done with a single instruction. */
13357 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
13358 && (live_regs_mask & (1 << LR_REGNUM)) != 0
13359 && !(frame_pointer_needed && TARGET_APCS_FRAME)
13362 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
13364 emit_set_insn (lr, plus_constant (lr, -4));
13367 if (live_regs_mask)
13369 saved_regs += bit_count (live_regs_mask) * 4;
13370 if (optimize_size && !frame_pointer_needed
13371 && saved_regs == offsets->saved_regs - offsets->saved_args)
13373 /* If no coprocessor registers are being pushed and we don't have
13374 to worry about a frame pointer then push extra registers to
13375 create the stack frame. This is done is a way that does not
13376 alter the frame layout, so is independent of the epilogue. */
13380 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
13382 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
13383 if (frame && n * 4 >= frame)
13386 live_regs_mask |= (1 << n) - 1;
13387 saved_regs += frame;
13390 insn = emit_multi_reg_push (live_regs_mask);
13391 RTX_FRAME_RELATED_P (insn) = 1;
13394 if (! IS_VOLATILE (func_type))
13395 saved_regs += arm_save_coproc_regs ();
13397 if (frame_pointer_needed && TARGET_ARM)
13399 /* Create the new frame pointer. */
13400 if (TARGET_APCS_FRAME)
13402 insn = GEN_INT (-(4 + args_to_push + fp_offset));
13403 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
13404 RTX_FRAME_RELATED_P (insn) = 1;
13406 if (IS_NESTED (func_type))
13408 /* Recover the static chain register. */
13409 if (!df_regs_ever_live_p (3)
13410 || saved_pretend_args)
13411 insn = gen_rtx_REG (SImode, 3);
13412 else /* if (crtl->args.pretend_args_size == 0) */
13414 insn = plus_constant (hard_frame_pointer_rtx, 4);
13415 insn = gen_frame_mem (SImode, insn);
13417 emit_set_insn (ip_rtx, insn);
13418 /* Add a USE to stop propagate_one_insn() from barfing. */
13419 emit_insn (gen_prologue_use (ip_rtx));
13424 insn = GEN_INT (saved_regs - 4);
13425 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13426 stack_pointer_rtx, insn));
13427 RTX_FRAME_RELATED_P (insn) = 1;
13431 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
13433 /* This add can produce multiple insns for a large constant, so we
13434 need to get tricky. */
13435 rtx last = get_last_insn ();
13437 amount = GEN_INT (offsets->saved_args + saved_regs
13438 - offsets->outgoing_args);
13440 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13444 last = last ? NEXT_INSN (last) : get_insns ();
13445 RTX_FRAME_RELATED_P (last) = 1;
13447 while (last != insn);
13449 /* If the frame pointer is needed, emit a special barrier that
13450 will prevent the scheduler from moving stores to the frame
13451 before the stack adjustment. */
13452 if (frame_pointer_needed)
13453 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
13454 hard_frame_pointer_rtx));
13458 if (frame_pointer_needed && TARGET_THUMB2)
13459 thumb_set_frame_pointer (offsets);
13461 if (flag_pic && arm_pic_register != INVALID_REGNUM)
13463 unsigned long mask;
13465 mask = live_regs_mask;
13466 mask &= THUMB2_WORK_REGS;
13467 if (!IS_NESTED (func_type))
13468 mask |= (1 << IP_REGNUM);
13469 arm_load_pic_register (mask);
13472 /* If we are profiling, make sure no instructions are scheduled before
13473 the call to mcount. Similarly if the user has requested no
13474 scheduling in the prolog. Similarly if we want non-call exceptions
13475 using the EABI unwinder, to prevent faulting instructions from being
13476 swapped with a stack adjustment. */
13477 if (crtl->profile || !TARGET_SCHED_PROLOG
13478 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
13479 emit_insn (gen_blockage ());
13481 /* If the link register is being kept alive, with the return address in it,
13482 then make sure that it does not get reused by the ce2 pass. */
13483 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
13484 cfun->machine->lr_save_eliminated = 1;
13487 /* Print condition code to STREAM. Helper function for arm_print_operand. */
13489 arm_print_condition (FILE *stream)
13491 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
13493 /* Branch conversion is not implemented for Thumb-2. */
13496 output_operand_lossage ("predicated Thumb instruction");
13499 if (current_insn_predicate != NULL)
13501 output_operand_lossage
13502 ("predicated instruction in conditional sequence");
13506 fputs (arm_condition_codes[arm_current_cc], stream);
13508 else if (current_insn_predicate)
13510 enum arm_cond_code code;
13514 output_operand_lossage ("predicated Thumb instruction");
13518 code = get_arm_condition_code (current_insn_predicate);
13519 fputs (arm_condition_codes[code], stream);
13524 /* If CODE is 'd', then the X is a condition operand and the instruction
13525 should only be executed if the condition is true.
13526 if CODE is 'D', then the X is a condition operand and the instruction
13527 should only be executed if the condition is false: however, if the mode
13528 of the comparison is CCFPEmode, then always execute the instruction -- we
13529 do this because in these circumstances !GE does not necessarily imply LT;
13530 in these cases the instruction pattern will take care to make sure that
13531 an instruction containing %d will follow, thereby undoing the effects of
13532 doing this instruction unconditionally.
13533 If CODE is 'N' then X is a floating point operand that must be negated
13535 If CODE is 'B' then output a bitwise inverted value of X (a const int).
13536 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
13538 arm_print_operand (FILE *stream, rtx x, int code)
13543 fputs (ASM_COMMENT_START, stream);
13547 fputs (user_label_prefix, stream);
13551 fputs (REGISTER_PREFIX, stream);
13555 arm_print_condition (stream);
13559 /* Nothing in unified syntax, otherwise the current condition code. */
13560 if (!TARGET_UNIFIED_ASM)
13561 arm_print_condition (stream);
13565 /* The current condition code in unified syntax, otherwise nothing. */
13566 if (TARGET_UNIFIED_ASM)
13567 arm_print_condition (stream);
13571 /* The current condition code for a condition code setting instruction.
13572 Preceded by 's' in unified syntax, otherwise followed by 's'. */
13573 if (TARGET_UNIFIED_ASM)
13575 fputc('s', stream);
13576 arm_print_condition (stream);
13580 arm_print_condition (stream);
13581 fputc('s', stream);
13586 /* If the instruction is conditionally executed then print
13587 the current condition code, otherwise print 's'. */
13588 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
13589 if (current_insn_predicate)
13590 arm_print_condition (stream);
13592 fputc('s', stream);
13595 /* %# is a "break" sequence. It doesn't output anything, but is used to
13596 separate e.g. operand numbers from following text, if that text consists
13597 of further digits which we don't want to be part of the operand
13605 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13606 r = REAL_VALUE_NEGATE (r);
13607 fprintf (stream, "%s", fp_const_from_val (&r));
13611 /* An integer or symbol address without a preceding # sign. */
13613 switch (GET_CODE (x))
13616 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13620 output_addr_const (stream, x);
13624 gcc_unreachable ();
13629 if (GET_CODE (x) == CONST_INT)
13632 val = ARM_SIGN_EXTEND (~INTVAL (x));
13633 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
13637 putc ('~', stream);
13638 output_addr_const (stream, x);
13643 /* The low 16 bits of an immediate constant. */
13644 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
13648 fprintf (stream, "%s", arithmetic_instr (x, 1));
13651 /* Truncate Cirrus shift counts. */
13653 if (GET_CODE (x) == CONST_INT)
13655 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
13658 arm_print_operand (stream, x, 0);
13662 fprintf (stream, "%s", arithmetic_instr (x, 0));
13670 if (!shift_operator (x, SImode))
13672 output_operand_lossage ("invalid shift operand");
13676 shift = shift_op (x, &val);
13680 fprintf (stream, ", %s ", shift);
13682 arm_print_operand (stream, XEXP (x, 1), 0);
13684 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
13689 /* An explanation of the 'Q', 'R' and 'H' register operands:
13691 In a pair of registers containing a DI or DF value the 'Q'
13692 operand returns the register number of the register containing
13693 the least significant part of the value. The 'R' operand returns
13694 the register number of the register containing the most
13695 significant part of the value.
13697 The 'H' operand returns the higher of the two register numbers.
13698 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
13699 same as the 'Q' operand, since the most significant part of the
13700 value is held in the lower number register. The reverse is true
13701 on systems where WORDS_BIG_ENDIAN is false.
13703 The purpose of these operands is to distinguish between cases
13704 where the endian-ness of the values is important (for example
13705 when they are added together), and cases where the endian-ness
13706 is irrelevant, but the order of register operations is important.
13707 For example when loading a value from memory into a register
13708 pair, the endian-ness does not matter. Provided that the value
13709 from the lower memory address is put into the lower numbered
13710 register, and the value from the higher address is put into the
13711 higher numbered register, the load will work regardless of whether
13712 the value being loaded is big-wordian or little-wordian. The
13713 order of the two register loads can matter however, if the address
13714 of the memory location is actually held in one of the registers
13715 being overwritten by the load. */
13717 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13719 output_operand_lossage ("invalid operand for code '%c'", code);
13723 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
13727 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13729 output_operand_lossage ("invalid operand for code '%c'", code);
13733 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
13737 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13739 output_operand_lossage ("invalid operand for code '%c'", code);
13743 asm_fprintf (stream, "%r", REGNO (x) + 1);
13747 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13749 output_operand_lossage ("invalid operand for code '%c'", code);
13753 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
13757 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13759 output_operand_lossage ("invalid operand for code '%c'", code);
13763 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
13767 asm_fprintf (stream, "%r",
13768 GET_CODE (XEXP (x, 0)) == REG
13769 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
13773 asm_fprintf (stream, "{%r-%r}",
13775 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
13778 /* Like 'M', but writing doubleword vector registers, for use by Neon
13782 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
13783 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
13785 asm_fprintf (stream, "{d%d}", regno);
13787 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
13792 /* CONST_TRUE_RTX means always -- that's the default. */
13793 if (x == const_true_rtx)
13796 if (!COMPARISON_P (x))
13798 output_operand_lossage ("invalid operand for code '%c'", code);
13802 fputs (arm_condition_codes[get_arm_condition_code (x)],
13807 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
13808 want to do that. */
13809 if (x == const_true_rtx)
13811 output_operand_lossage ("instruction never executed");
13814 if (!COMPARISON_P (x))
13816 output_operand_lossage ("invalid operand for code '%c'", code);
13820 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
13821 (get_arm_condition_code (x))],
13825 /* Cirrus registers can be accessed in a variety of ways:
13826 single floating point (f)
13827 double floating point (d)
13829 64bit integer (dx). */
13830 case 'W': /* Cirrus register in F mode. */
13831 case 'X': /* Cirrus register in D mode. */
13832 case 'Y': /* Cirrus register in FX mode. */
13833 case 'Z': /* Cirrus register in DX mode. */
13834 gcc_assert (GET_CODE (x) == REG
13835 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
13837 fprintf (stream, "mv%s%s",
13839 : code == 'X' ? "d"
13840 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
13844 /* Print cirrus register in the mode specified by the register's mode. */
13847 int mode = GET_MODE (x);
13849 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
13851 output_operand_lossage ("invalid operand for code '%c'", code);
13855 fprintf (stream, "mv%s%s",
13856 mode == DFmode ? "d"
13857 : mode == SImode ? "fx"
13858 : mode == DImode ? "dx"
13859 : "f", reg_names[REGNO (x)] + 2);
13865 if (GET_CODE (x) != REG
13866 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
13867 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
13868 /* Bad value for wCG register number. */
13870 output_operand_lossage ("invalid operand for code '%c'", code);
13875 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
13878 /* Print an iWMMXt control register name. */
13880 if (GET_CODE (x) != CONST_INT
13882 || INTVAL (x) >= 16)
13883 /* Bad value for wC register number. */
13885 output_operand_lossage ("invalid operand for code '%c'", code);
13891 static const char * wc_reg_names [16] =
13893 "wCID", "wCon", "wCSSF", "wCASF",
13894 "wC4", "wC5", "wC6", "wC7",
13895 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
13896 "wC12", "wC13", "wC14", "wC15"
13899 fprintf (stream, wc_reg_names [INTVAL (x)]);
13903 /* Print a VFP/Neon double precision or quad precision register name. */
13907 int mode = GET_MODE (x);
13908 int is_quad = (code == 'q');
13911 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
13913 output_operand_lossage ("invalid operand for code '%c'", code);
13917 if (GET_CODE (x) != REG
13918 || !IS_VFP_REGNUM (REGNO (x)))
13920 output_operand_lossage ("invalid operand for code '%c'", code);
13925 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
13926 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
13928 output_operand_lossage ("invalid operand for code '%c'", code);
13932 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
13933 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
13937 /* These two codes print the low/high doubleword register of a Neon quad
13938 register, respectively. For pair-structure types, can also print
13939 low/high quadword registers. */
13943 int mode = GET_MODE (x);
13946 if ((GET_MODE_SIZE (mode) != 16
13947 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
13949 output_operand_lossage ("invalid operand for code '%c'", code);
13954 if (!NEON_REGNO_OK_FOR_QUAD (regno))
13956 output_operand_lossage ("invalid operand for code '%c'", code);
13960 if (GET_MODE_SIZE (mode) == 16)
13961 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
13962 + (code == 'f' ? 1 : 0));
13964 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
13965 + (code == 'f' ? 1 : 0));
13969 /* Print a VFPv3 floating-point constant, represented as an integer
13973 int index = vfp3_const_double_index (x);
13974 gcc_assert (index != -1);
13975 fprintf (stream, "%d", index);
13979 /* Print bits representing opcode features for Neon.
13981 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
13982 and polynomials as unsigned.
13984 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
13986 Bit 2 is 1 for rounding functions, 0 otherwise. */
13988 /* Identify the type as 's', 'u', 'p' or 'f'. */
13991 HOST_WIDE_INT bits = INTVAL (x);
13992 fputc ("uspf"[bits & 3], stream);
13996 /* Likewise, but signed and unsigned integers are both 'i'. */
13999 HOST_WIDE_INT bits = INTVAL (x);
14000 fputc ("iipf"[bits & 3], stream);
14004 /* As for 'T', but emit 'u' instead of 'p'. */
14007 HOST_WIDE_INT bits = INTVAL (x);
14008 fputc ("usuf"[bits & 3], stream);
14012 /* Bit 2: rounding (vs none). */
14015 HOST_WIDE_INT bits = INTVAL (x);
14016 fputs ((bits & 4) != 0 ? "r" : "", stream);
14020 /* Memory operand for vld1/vst1 instruction. */
14024 bool postinc = FALSE;
14025 gcc_assert (GET_CODE (x) == MEM);
14026 addr = XEXP (x, 0);
14027 if (GET_CODE (addr) == POST_INC)
14030 addr = XEXP (addr, 0);
14032 asm_fprintf (stream, "[%r]", REGNO (addr));
14034 fputs("!", stream);
14038 /* Register specifier for vld1.16/vst1.16. Translate the S register
14039 number into a D register number and element index. */
14042 int mode = GET_MODE (x);
14045 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
14047 output_operand_lossage ("invalid operand for code '%c'", code);
14052 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
14054 output_operand_lossage ("invalid operand for code '%c'", code);
14058 regno = regno - FIRST_VFP_REGNUM;
14059 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
14066 output_operand_lossage ("missing operand");
14070 switch (GET_CODE (x))
14073 asm_fprintf (stream, "%r", REGNO (x));
14077 output_memory_reference_mode = GET_MODE (x);
14078 output_address (XEXP (x, 0));
14085 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
14086 sizeof (fpstr), 0, 1);
14087 fprintf (stream, "#%s", fpstr);
14090 fprintf (stream, "#%s", fp_immediate_constant (x));
14094 gcc_assert (GET_CODE (x) != NEG);
14095 fputc ('#', stream);
14096 output_addr_const (stream, x);
14102 /* Target hook for assembling integer objects. The ARM version needs to
14103 handle word-sized values specially. */
14105 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
14107 enum machine_mode mode;
14109 if (size == UNITS_PER_WORD && aligned_p)
14111 fputs ("\t.word\t", asm_out_file);
14112 output_addr_const (asm_out_file, x);
14114 /* Mark symbols as position independent. We only do this in the
14115 .text segment, not in the .data segment. */
14116 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
14117 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
14119 /* See legitimize_pic_address for an explanation of the
14120 TARGET_VXWORKS_RTP check. */
14121 if (TARGET_VXWORKS_RTP
14122 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
14123 fputs ("(GOT)", asm_out_file);
14125 fputs ("(GOTOFF)", asm_out_file);
14127 fputc ('\n', asm_out_file);
14131 mode = GET_MODE (x);
14133 if (arm_vector_mode_supported_p (mode))
14137 gcc_assert (GET_CODE (x) == CONST_VECTOR);
14139 units = CONST_VECTOR_NUNITS (x);
14140 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
14142 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14143 for (i = 0; i < units; i++)
14145 rtx elt = CONST_VECTOR_ELT (x, i);
14147 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
14150 for (i = 0; i < units; i++)
14152 rtx elt = CONST_VECTOR_ELT (x, i);
14153 REAL_VALUE_TYPE rval;
14155 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
14158 (rval, GET_MODE_INNER (mode),
14159 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
14165 return default_assemble_integer (x, size, aligned_p);
14169 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
14173 if (!TARGET_AAPCS_BASED)
14176 default_named_section_asm_out_constructor
14177 : default_named_section_asm_out_destructor) (symbol, priority);
14181 /* Put these in the .init_array section, using a special relocation. */
14182 if (priority != DEFAULT_INIT_PRIORITY)
14185 sprintf (buf, "%s.%.5u",
14186 is_ctor ? ".init_array" : ".fini_array",
14188 s = get_section (buf, SECTION_WRITE, NULL_TREE);
14195 switch_to_section (s);
14196 assemble_align (POINTER_SIZE);
14197 fputs ("\t.word\t", asm_out_file);
14198 output_addr_const (asm_out_file, symbol);
14199 fputs ("(target1)\n", asm_out_file);
14202 /* Add a function to the list of static constructors. */
14205 arm_elf_asm_constructor (rtx symbol, int priority)
14207 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
14210 /* Add a function to the list of static destructors. */
14213 arm_elf_asm_destructor (rtx symbol, int priority)
14215 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
14218 /* A finite state machine takes care of noticing whether or not instructions
14219 can be conditionally executed, and thus decrease execution time and code
14220 size by deleting branch instructions. The fsm is controlled by
14221 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
14223 /* The state of the fsm controlling condition codes are:
14224 0: normal, do nothing special
14225 1: make ASM_OUTPUT_OPCODE not output this instruction
14226 2: make ASM_OUTPUT_OPCODE not output this instruction
14227 3: make instructions conditional
14228 4: make instructions conditional
14230 State transitions (state->state by whom under condition):
14231 0 -> 1 final_prescan_insn if the `target' is a label
14232 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
14233 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
14234 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
14235 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
14236 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
14237 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
14238 (the target insn is arm_target_insn).
14240 If the jump clobbers the conditions then we use states 2 and 4.
14242 A similar thing can be done with conditional return insns.
14244 XXX In case the `target' is an unconditional branch, this conditionalising
14245 of the instructions always reduces code size, but not always execution
14246 time. But then, I want to reduce the code size to somewhere near what
14247 /bin/cc produces. */
14249 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
14250 instructions. When a COND_EXEC instruction is seen the subsequent
14251 instructions are scanned so that multiple conditional instructions can be
14252 combined into a single IT block. arm_condexec_count and arm_condexec_mask
14253 specify the length and true/false mask for the IT block. These will be
14254 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
14256 /* Returns the index of the ARM condition code string in
14257 `arm_condition_codes'. COMPARISON should be an rtx like
14258 `(eq (...) (...))'. */
14259 static enum arm_cond_code
14260 get_arm_condition_code (rtx comparison)
14262 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
14263 enum arm_cond_code code;
14264 enum rtx_code comp_code = GET_CODE (comparison);
14266 if (GET_MODE_CLASS (mode) != MODE_CC)
14267 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
14268 XEXP (comparison, 1));
14272 case CC_DNEmode: code = ARM_NE; goto dominance;
14273 case CC_DEQmode: code = ARM_EQ; goto dominance;
14274 case CC_DGEmode: code = ARM_GE; goto dominance;
14275 case CC_DGTmode: code = ARM_GT; goto dominance;
14276 case CC_DLEmode: code = ARM_LE; goto dominance;
14277 case CC_DLTmode: code = ARM_LT; goto dominance;
14278 case CC_DGEUmode: code = ARM_CS; goto dominance;
14279 case CC_DGTUmode: code = ARM_HI; goto dominance;
14280 case CC_DLEUmode: code = ARM_LS; goto dominance;
14281 case CC_DLTUmode: code = ARM_CC;
14284 gcc_assert (comp_code == EQ || comp_code == NE);
14286 if (comp_code == EQ)
14287 return ARM_INVERSE_CONDITION_CODE (code);
14293 case NE: return ARM_NE;
14294 case EQ: return ARM_EQ;
14295 case GE: return ARM_PL;
14296 case LT: return ARM_MI;
14297 default: gcc_unreachable ();
14303 case NE: return ARM_NE;
14304 case EQ: return ARM_EQ;
14305 default: gcc_unreachable ();
14311 case NE: return ARM_MI;
14312 case EQ: return ARM_PL;
14313 default: gcc_unreachable ();
14318 /* These encodings assume that AC=1 in the FPA system control
14319 byte. This allows us to handle all cases except UNEQ and
14323 case GE: return ARM_GE;
14324 case GT: return ARM_GT;
14325 case LE: return ARM_LS;
14326 case LT: return ARM_MI;
14327 case NE: return ARM_NE;
14328 case EQ: return ARM_EQ;
14329 case ORDERED: return ARM_VC;
14330 case UNORDERED: return ARM_VS;
14331 case UNLT: return ARM_LT;
14332 case UNLE: return ARM_LE;
14333 case UNGT: return ARM_HI;
14334 case UNGE: return ARM_PL;
14335 /* UNEQ and LTGT do not have a representation. */
14336 case UNEQ: /* Fall through. */
14337 case LTGT: /* Fall through. */
14338 default: gcc_unreachable ();
14344 case NE: return ARM_NE;
14345 case EQ: return ARM_EQ;
14346 case GE: return ARM_LE;
14347 case GT: return ARM_LT;
14348 case LE: return ARM_GE;
14349 case LT: return ARM_GT;
14350 case GEU: return ARM_LS;
14351 case GTU: return ARM_CC;
14352 case LEU: return ARM_CS;
14353 case LTU: return ARM_HI;
14354 default: gcc_unreachable ();
14360 case LTU: return ARM_CS;
14361 case GEU: return ARM_CC;
14362 default: gcc_unreachable ();
14368 case NE: return ARM_NE;
14369 case EQ: return ARM_EQ;
14370 case GE: return ARM_GE;
14371 case GT: return ARM_GT;
14372 case LE: return ARM_LE;
14373 case LT: return ARM_LT;
14374 case GEU: return ARM_CS;
14375 case GTU: return ARM_HI;
14376 case LEU: return ARM_LS;
14377 case LTU: return ARM_CC;
14378 default: gcc_unreachable ();
14381 default: gcc_unreachable ();
14385 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
14388 thumb2_final_prescan_insn (rtx insn)
14390 rtx first_insn = insn;
14391 rtx body = PATTERN (insn);
14393 enum arm_cond_code code;
14397 /* Remove the previous insn from the count of insns to be output. */
14398 if (arm_condexec_count)
14399 arm_condexec_count--;
14401 /* Nothing to do if we are already inside a conditional block. */
14402 if (arm_condexec_count)
14405 if (GET_CODE (body) != COND_EXEC)
14408 /* Conditional jumps are implemented directly. */
14409 if (GET_CODE (insn) == JUMP_INSN)
14412 predicate = COND_EXEC_TEST (body);
14413 arm_current_cc = get_arm_condition_code (predicate);
14415 n = get_attr_ce_count (insn);
14416 arm_condexec_count = 1;
14417 arm_condexec_mask = (1 << n) - 1;
14418 arm_condexec_masklen = n;
14419 /* See if subsequent instructions can be combined into the same block. */
14422 insn = next_nonnote_insn (insn);
14424 /* Jumping into the middle of an IT block is illegal, so a label or
14425 barrier terminates the block. */
14426 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
14429 body = PATTERN (insn);
14430 /* USE and CLOBBER aren't really insns, so just skip them. */
14431 if (GET_CODE (body) == USE
14432 || GET_CODE (body) == CLOBBER)
14435 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
14436 if (GET_CODE (body) != COND_EXEC)
14438 /* Allow up to 4 conditionally executed instructions in a block. */
14439 n = get_attr_ce_count (insn);
14440 if (arm_condexec_masklen + n > 4)
14443 predicate = COND_EXEC_TEST (body);
14444 code = get_arm_condition_code (predicate);
14445 mask = (1 << n) - 1;
14446 if (arm_current_cc == code)
14447 arm_condexec_mask |= (mask << arm_condexec_masklen);
14448 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
14451 arm_condexec_count++;
14452 arm_condexec_masklen += n;
14454 /* A jump must be the last instruction in a conditional block. */
14455 if (GET_CODE(insn) == JUMP_INSN)
14458 /* Restore recog_data (getting the attributes of other insns can
14459 destroy this array, but final.c assumes that it remains intact
14460 across this call). */
14461 extract_constrain_insn_cached (first_insn);
14465 arm_final_prescan_insn (rtx insn)
14467 /* BODY will hold the body of INSN. */
14468 rtx body = PATTERN (insn);
14470 /* This will be 1 if trying to repeat the trick, and things need to be
14471 reversed if it appears to fail. */
14474 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
14475 taken are clobbered, even if the rtl suggests otherwise. It also
14476 means that we have to grub around within the jump expression to find
14477 out what the conditions are when the jump isn't taken. */
14478 int jump_clobbers = 0;
14480 /* If we start with a return insn, we only succeed if we find another one. */
14481 int seeking_return = 0;
14483 /* START_INSN will hold the insn from where we start looking. This is the
14484 first insn after the following code_label if REVERSE is true. */
14485 rtx start_insn = insn;
14487 /* If in state 4, check if the target branch is reached, in order to
14488 change back to state 0. */
14489 if (arm_ccfsm_state == 4)
14491 if (insn == arm_target_insn)
14493 arm_target_insn = NULL;
14494 arm_ccfsm_state = 0;
14499 /* If in state 3, it is possible to repeat the trick, if this insn is an
14500 unconditional branch to a label, and immediately following this branch
14501 is the previous target label which is only used once, and the label this
14502 branch jumps to is not too far off. */
14503 if (arm_ccfsm_state == 3)
14505 if (simplejump_p (insn))
14507 start_insn = next_nonnote_insn (start_insn);
14508 if (GET_CODE (start_insn) == BARRIER)
14510 /* XXX Isn't this always a barrier? */
14511 start_insn = next_nonnote_insn (start_insn);
14513 if (GET_CODE (start_insn) == CODE_LABEL
14514 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
14515 && LABEL_NUSES (start_insn) == 1)
14520 else if (GET_CODE (body) == RETURN)
14522 start_insn = next_nonnote_insn (start_insn);
14523 if (GET_CODE (start_insn) == BARRIER)
14524 start_insn = next_nonnote_insn (start_insn);
14525 if (GET_CODE (start_insn) == CODE_LABEL
14526 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
14527 && LABEL_NUSES (start_insn) == 1)
14530 seeking_return = 1;
14539 gcc_assert (!arm_ccfsm_state || reverse);
14540 if (GET_CODE (insn) != JUMP_INSN)
14543 /* This jump might be paralleled with a clobber of the condition codes
14544 the jump should always come first */
14545 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
14546 body = XVECEXP (body, 0, 0);
14549 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
14550 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
14553 int fail = FALSE, succeed = FALSE;
14554 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
14555 int then_not_else = TRUE;
14556 rtx this_insn = start_insn, label = 0;
14558 /* If the jump cannot be done with one instruction, we cannot
14559 conditionally execute the instruction in the inverse case. */
14560 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
14566 /* Register the insn jumped to. */
14569 if (!seeking_return)
14570 label = XEXP (SET_SRC (body), 0);
14572 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
14573 label = XEXP (XEXP (SET_SRC (body), 1), 0);
14574 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
14576 label = XEXP (XEXP (SET_SRC (body), 2), 0);
14577 then_not_else = FALSE;
14579 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
14580 seeking_return = 1;
14581 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
14583 seeking_return = 1;
14584 then_not_else = FALSE;
14587 gcc_unreachable ();
14589 /* See how many insns this branch skips, and what kind of insns. If all
14590 insns are okay, and the label or unconditional branch to the same
14591 label is not too far away, succeed. */
14592 for (insns_skipped = 0;
14593 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
14597 this_insn = next_nonnote_insn (this_insn);
14601 switch (GET_CODE (this_insn))
14604 /* Succeed if it is the target label, otherwise fail since
14605 control falls in from somewhere else. */
14606 if (this_insn == label)
14610 arm_ccfsm_state = 2;
14611 this_insn = next_nonnote_insn (this_insn);
14614 arm_ccfsm_state = 1;
14622 /* Succeed if the following insn is the target label.
14624 If return insns are used then the last insn in a function
14625 will be a barrier. */
14626 this_insn = next_nonnote_insn (this_insn);
14627 if (this_insn && this_insn == label)
14631 arm_ccfsm_state = 2;
14632 this_insn = next_nonnote_insn (this_insn);
14635 arm_ccfsm_state = 1;
14643 /* The AAPCS says that conditional calls should not be
14644 used since they make interworking inefficient (the
14645 linker can't transform BL<cond> into BLX). That's
14646 only a problem if the machine has BLX. */
14653 /* Succeed if the following insn is the target label, or
14654 if the following two insns are a barrier and the
14656 this_insn = next_nonnote_insn (this_insn);
14657 if (this_insn && GET_CODE (this_insn) == BARRIER)
14658 this_insn = next_nonnote_insn (this_insn);
14660 if (this_insn && this_insn == label
14661 && insns_skipped < max_insns_skipped)
14665 arm_ccfsm_state = 2;
14666 this_insn = next_nonnote_insn (this_insn);
14669 arm_ccfsm_state = 1;
14677 /* If this is an unconditional branch to the same label, succeed.
14678 If it is to another label, do nothing. If it is conditional,
14680 /* XXX Probably, the tests for SET and the PC are
14683 scanbody = PATTERN (this_insn);
14684 if (GET_CODE (scanbody) == SET
14685 && GET_CODE (SET_DEST (scanbody)) == PC)
14687 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
14688 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
14690 arm_ccfsm_state = 2;
14693 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
14696 /* Fail if a conditional return is undesirable (e.g. on a
14697 StrongARM), but still allow this if optimizing for size. */
14698 else if (GET_CODE (scanbody) == RETURN
14699 && !use_return_insn (TRUE, NULL)
14702 else if (GET_CODE (scanbody) == RETURN
14705 arm_ccfsm_state = 2;
14708 else if (GET_CODE (scanbody) == PARALLEL)
14710 switch (get_attr_conds (this_insn))
14720 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
14725 /* Instructions using or affecting the condition codes make it
14727 scanbody = PATTERN (this_insn);
14728 if (!(GET_CODE (scanbody) == SET
14729 || GET_CODE (scanbody) == PARALLEL)
14730 || get_attr_conds (this_insn) != CONDS_NOCOND)
14733 /* A conditional cirrus instruction must be followed by
14734 a non Cirrus instruction. However, since we
14735 conditionalize instructions in this function and by
14736 the time we get here we can't add instructions
14737 (nops), because shorten_branches() has already been
14738 called, we will disable conditionalizing Cirrus
14739 instructions to be safe. */
14740 if (GET_CODE (scanbody) != USE
14741 && GET_CODE (scanbody) != CLOBBER
14742 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
14752 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
14753 arm_target_label = CODE_LABEL_NUMBER (label);
14756 gcc_assert (seeking_return || arm_ccfsm_state == 2);
14758 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
14760 this_insn = next_nonnote_insn (this_insn);
14761 gcc_assert (!this_insn
14762 || (GET_CODE (this_insn) != BARRIER
14763 && GET_CODE (this_insn) != CODE_LABEL));
14767 /* Oh, dear! we ran off the end.. give up. */
14768 extract_constrain_insn_cached (insn);
14769 arm_ccfsm_state = 0;
14770 arm_target_insn = NULL;
14773 arm_target_insn = this_insn;
14777 gcc_assert (!reverse);
14779 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
14781 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
14782 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14783 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
14784 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14788 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
14791 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
14795 if (reverse || then_not_else)
14796 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14799 /* Restore recog_data (getting the attributes of other insns can
14800 destroy this array, but final.c assumes that it remains intact
14801 across this call. */
14802 extract_constrain_insn_cached (insn);
14806 /* Output IT instructions. */
14808 thumb2_asm_output_opcode (FILE * stream)
14813 if (arm_condexec_mask)
14815 for (n = 0; n < arm_condexec_masklen; n++)
14816 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
14818 asm_fprintf(stream, "i%s\t%s\n\t", buff,
14819 arm_condition_codes[arm_current_cc]);
14820 arm_condexec_mask = 0;
14824 /* Returns true if REGNO is a valid register
14825 for holding a quantity of type MODE. */
14827 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
14829 if (GET_MODE_CLASS (mode) == MODE_CC)
14830 return (regno == CC_REGNUM
14831 || (TARGET_HARD_FLOAT && TARGET_VFP
14832 && regno == VFPCC_REGNUM));
14835 /* For the Thumb we only allow values bigger than SImode in
14836 registers 0 - 6, so that there is always a second low
14837 register available to hold the upper part of the value.
14838 We probably we ought to ensure that the register is the
14839 start of an even numbered register pair. */
14840 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
14842 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
14843 && IS_CIRRUS_REGNUM (regno))
14844 /* We have outlawed SI values in Cirrus registers because they
14845 reside in the lower 32 bits, but SF values reside in the
14846 upper 32 bits. This causes gcc all sorts of grief. We can't
14847 even split the registers into pairs because Cirrus SI values
14848 get sign extended to 64bits-- aldyh. */
14849 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
14851 if (TARGET_HARD_FLOAT && TARGET_VFP
14852 && IS_VFP_REGNUM (regno))
14854 if (mode == SFmode || mode == SImode)
14855 return VFP_REGNO_OK_FOR_SINGLE (regno);
14857 if (mode == DFmode)
14858 return VFP_REGNO_OK_FOR_DOUBLE (regno);
14860 /* VFP registers can hold HFmode values, but there is no point in
14861 putting them there unless we have the NEON extensions for
14862 loading/storing them, too. */
14863 if (mode == HFmode)
14864 return TARGET_NEON_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
14867 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
14868 || (VALID_NEON_QREG_MODE (mode)
14869 && NEON_REGNO_OK_FOR_QUAD (regno))
14870 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
14871 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
14872 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
14873 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
14874 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
14879 if (TARGET_REALLY_IWMMXT)
14881 if (IS_IWMMXT_GR_REGNUM (regno))
14882 return mode == SImode;
14884 if (IS_IWMMXT_REGNUM (regno))
14885 return VALID_IWMMXT_REG_MODE (mode);
14888 /* We allow almost any value to be stored in the general registers.
14889 Restrict doubleword quantities to even register pairs so that we can
14890 use ldrd. Do not allow very large Neon structure opaque modes in
14891 general registers; they would use too many. */
14892 if (regno <= LAST_ARM_REGNUM)
14893 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
14894 && ARM_NUM_REGS (mode) <= 4;
14896 if (regno == FRAME_POINTER_REGNUM
14897 || regno == ARG_POINTER_REGNUM)
14898 /* We only allow integers in the fake hard registers. */
14899 return GET_MODE_CLASS (mode) == MODE_INT;
14901 /* The only registers left are the FPA registers
14902 which we only allow to hold FP values. */
14903 return (TARGET_HARD_FLOAT && TARGET_FPA
14904 && GET_MODE_CLASS (mode) == MODE_FLOAT
14905 && regno >= FIRST_FPA_REGNUM
14906 && regno <= LAST_FPA_REGNUM);
14909 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
14910 not used in arm mode. */
14913 arm_regno_class (int regno)
14917 if (regno == STACK_POINTER_REGNUM)
14919 if (regno == CC_REGNUM)
14926 if (TARGET_THUMB2 && regno < 8)
14929 if ( regno <= LAST_ARM_REGNUM
14930 || regno == FRAME_POINTER_REGNUM
14931 || regno == ARG_POINTER_REGNUM)
14932 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
14934 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
14935 return TARGET_THUMB2 ? CC_REG : NO_REGS;
14937 if (IS_CIRRUS_REGNUM (regno))
14938 return CIRRUS_REGS;
14940 if (IS_VFP_REGNUM (regno))
14942 if (regno <= D7_VFP_REGNUM)
14943 return VFP_D0_D7_REGS;
14944 else if (regno <= LAST_LO_VFP_REGNUM)
14945 return VFP_LO_REGS;
14947 return VFP_HI_REGS;
14950 if (IS_IWMMXT_REGNUM (regno))
14951 return IWMMXT_REGS;
14953 if (IS_IWMMXT_GR_REGNUM (regno))
14954 return IWMMXT_GR_REGS;
14959 /* Handle a special case when computing the offset
14960 of an argument from the frame pointer. */
14962 arm_debugger_arg_offset (int value, rtx addr)
14966 /* We are only interested if dbxout_parms() failed to compute the offset. */
14970 /* We can only cope with the case where the address is held in a register. */
14971 if (GET_CODE (addr) != REG)
14974 /* If we are using the frame pointer to point at the argument, then
14975 an offset of 0 is correct. */
14976 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
14979 /* If we are using the stack pointer to point at the
14980 argument, then an offset of 0 is correct. */
14981 /* ??? Check this is consistent with thumb2 frame layout. */
14982 if ((TARGET_THUMB || !frame_pointer_needed)
14983 && REGNO (addr) == SP_REGNUM)
14986 /* Oh dear. The argument is pointed to by a register rather
14987 than being held in a register, or being stored at a known
14988 offset from the frame pointer. Since GDB only understands
14989 those two kinds of argument we must translate the address
14990 held in the register into an offset from the frame pointer.
14991 We do this by searching through the insns for the function
14992 looking to see where this register gets its value. If the
14993 register is initialized from the frame pointer plus an offset
14994 then we are in luck and we can continue, otherwise we give up.
14996 This code is exercised by producing debugging information
14997 for a function with arguments like this:
14999 double func (double a, double b, int c, double d) {return d;}
15001 Without this code the stab for parameter 'd' will be set to
15002 an offset of 0 from the frame pointer, rather than 8. */
15004 /* The if() statement says:
15006 If the insn is a normal instruction
15007 and if the insn is setting the value in a register
15008 and if the register being set is the register holding the address of the argument
15009 and if the address is computing by an addition
15010 that involves adding to a register
15011 which is the frame pointer
15016 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15018 if ( GET_CODE (insn) == INSN
15019 && GET_CODE (PATTERN (insn)) == SET
15020 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
15021 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
15022 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
15023 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
15024 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
15027 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
15036 warning (0, "unable to compute real location of stacked parameter");
15037 value = 8; /* XXX magic hack */
15043 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
15046 if ((MASK) & insn_flags) \
15047 add_builtin_function ((NAME), (TYPE), (CODE), \
15048 BUILT_IN_MD, NULL, NULL_TREE); \
15052 struct builtin_description
15054 const unsigned int mask;
15055 const enum insn_code icode;
15056 const char * const name;
15057 const enum arm_builtins code;
15058 const enum rtx_code comparison;
15059 const unsigned int flag;
15062 static const struct builtin_description bdesc_2arg[] =
15064 #define IWMMXT_BUILTIN(code, string, builtin) \
15065 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
15066 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
15068 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
15069 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
15070 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
15071 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
15072 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
15073 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
15074 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
15075 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
15076 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
15077 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
15078 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
15079 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
15080 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
15081 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
15082 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
15083 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
15084 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
15085 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
15086 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
15087 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
15088 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
15089 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
15090 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
15091 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
15092 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
15093 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
15094 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
15095 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
15096 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
15097 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
15098 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
15099 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
15100 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
15101 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
15102 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
15103 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
15104 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
15105 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
15106 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
15107 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
15108 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
15109 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
15110 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
15111 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
15112 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
15113 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
15114 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
15115 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
15116 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
15117 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
15118 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
15119 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
15120 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
15121 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
15122 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
15123 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
15124 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
15125 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
15127 #define IWMMXT_BUILTIN2(code, builtin) \
15128 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
15130 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
15131 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
15132 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
15133 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
15134 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
15135 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
15136 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
15137 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
15138 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
15139 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
15140 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
15141 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
15142 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
15143 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
15144 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
15145 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
15146 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
15147 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
15148 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
15149 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
15150 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
15151 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
15152 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
15153 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
15154 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
15155 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
15156 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
15157 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
15158 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
15159 IWMMXT_BUILTIN2 (rordi3, WRORDI)
15160 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
15161 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
15164 static const struct builtin_description bdesc_1arg[] =
15166 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
15167 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
15168 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
15169 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
15170 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
15171 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
15172 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
15173 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
15174 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
15175 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
15176 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
15177 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
15178 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
15179 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
15180 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
15181 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
15182 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
15183 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
15186 /* Set up all the iWMMXt builtins. This is
15187 not called if TARGET_IWMMXT is zero. */
15190 arm_init_iwmmxt_builtins (void)
15192 const struct builtin_description * d;
15194 tree endlink = void_list_node;
15196 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15197 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15198 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15201 = build_function_type (integer_type_node,
15202 tree_cons (NULL_TREE, integer_type_node, endlink));
15203 tree v8qi_ftype_v8qi_v8qi_int
15204 = build_function_type (V8QI_type_node,
15205 tree_cons (NULL_TREE, V8QI_type_node,
15206 tree_cons (NULL_TREE, V8QI_type_node,
15207 tree_cons (NULL_TREE,
15210 tree v4hi_ftype_v4hi_int
15211 = build_function_type (V4HI_type_node,
15212 tree_cons (NULL_TREE, V4HI_type_node,
15213 tree_cons (NULL_TREE, integer_type_node,
15215 tree v2si_ftype_v2si_int
15216 = build_function_type (V2SI_type_node,
15217 tree_cons (NULL_TREE, V2SI_type_node,
15218 tree_cons (NULL_TREE, integer_type_node,
15220 tree v2si_ftype_di_di
15221 = build_function_type (V2SI_type_node,
15222 tree_cons (NULL_TREE, long_long_integer_type_node,
15223 tree_cons (NULL_TREE, long_long_integer_type_node,
15225 tree di_ftype_di_int
15226 = build_function_type (long_long_integer_type_node,
15227 tree_cons (NULL_TREE, long_long_integer_type_node,
15228 tree_cons (NULL_TREE, integer_type_node,
15230 tree di_ftype_di_int_int
15231 = build_function_type (long_long_integer_type_node,
15232 tree_cons (NULL_TREE, long_long_integer_type_node,
15233 tree_cons (NULL_TREE, integer_type_node,
15234 tree_cons (NULL_TREE,
15237 tree int_ftype_v8qi
15238 = build_function_type (integer_type_node,
15239 tree_cons (NULL_TREE, V8QI_type_node,
15241 tree int_ftype_v4hi
15242 = build_function_type (integer_type_node,
15243 tree_cons (NULL_TREE, V4HI_type_node,
15245 tree int_ftype_v2si
15246 = build_function_type (integer_type_node,
15247 tree_cons (NULL_TREE, V2SI_type_node,
15249 tree int_ftype_v8qi_int
15250 = build_function_type (integer_type_node,
15251 tree_cons (NULL_TREE, V8QI_type_node,
15252 tree_cons (NULL_TREE, integer_type_node,
15254 tree int_ftype_v4hi_int
15255 = build_function_type (integer_type_node,
15256 tree_cons (NULL_TREE, V4HI_type_node,
15257 tree_cons (NULL_TREE, integer_type_node,
15259 tree int_ftype_v2si_int
15260 = build_function_type (integer_type_node,
15261 tree_cons (NULL_TREE, V2SI_type_node,
15262 tree_cons (NULL_TREE, integer_type_node,
15264 tree v8qi_ftype_v8qi_int_int
15265 = build_function_type (V8QI_type_node,
15266 tree_cons (NULL_TREE, V8QI_type_node,
15267 tree_cons (NULL_TREE, integer_type_node,
15268 tree_cons (NULL_TREE,
15271 tree v4hi_ftype_v4hi_int_int
15272 = build_function_type (V4HI_type_node,
15273 tree_cons (NULL_TREE, V4HI_type_node,
15274 tree_cons (NULL_TREE, integer_type_node,
15275 tree_cons (NULL_TREE,
15278 tree v2si_ftype_v2si_int_int
15279 = build_function_type (V2SI_type_node,
15280 tree_cons (NULL_TREE, V2SI_type_node,
15281 tree_cons (NULL_TREE, integer_type_node,
15282 tree_cons (NULL_TREE,
15285 /* Miscellaneous. */
15286 tree v8qi_ftype_v4hi_v4hi
15287 = build_function_type (V8QI_type_node,
15288 tree_cons (NULL_TREE, V4HI_type_node,
15289 tree_cons (NULL_TREE, V4HI_type_node,
15291 tree v4hi_ftype_v2si_v2si
15292 = build_function_type (V4HI_type_node,
15293 tree_cons (NULL_TREE, V2SI_type_node,
15294 tree_cons (NULL_TREE, V2SI_type_node,
15296 tree v2si_ftype_v4hi_v4hi
15297 = build_function_type (V2SI_type_node,
15298 tree_cons (NULL_TREE, V4HI_type_node,
15299 tree_cons (NULL_TREE, V4HI_type_node,
15301 tree v2si_ftype_v8qi_v8qi
15302 = build_function_type (V2SI_type_node,
15303 tree_cons (NULL_TREE, V8QI_type_node,
15304 tree_cons (NULL_TREE, V8QI_type_node,
15306 tree v4hi_ftype_v4hi_di
15307 = build_function_type (V4HI_type_node,
15308 tree_cons (NULL_TREE, V4HI_type_node,
15309 tree_cons (NULL_TREE,
15310 long_long_integer_type_node,
15312 tree v2si_ftype_v2si_di
15313 = build_function_type (V2SI_type_node,
15314 tree_cons (NULL_TREE, V2SI_type_node,
15315 tree_cons (NULL_TREE,
15316 long_long_integer_type_node,
15318 tree void_ftype_int_int
15319 = build_function_type (void_type_node,
15320 tree_cons (NULL_TREE, integer_type_node,
15321 tree_cons (NULL_TREE, integer_type_node,
15324 = build_function_type (long_long_unsigned_type_node, endlink);
15326 = build_function_type (long_long_integer_type_node,
15327 tree_cons (NULL_TREE, V8QI_type_node,
15330 = build_function_type (long_long_integer_type_node,
15331 tree_cons (NULL_TREE, V4HI_type_node,
15334 = build_function_type (long_long_integer_type_node,
15335 tree_cons (NULL_TREE, V2SI_type_node,
15337 tree v2si_ftype_v4hi
15338 = build_function_type (V2SI_type_node,
15339 tree_cons (NULL_TREE, V4HI_type_node,
15341 tree v4hi_ftype_v8qi
15342 = build_function_type (V4HI_type_node,
15343 tree_cons (NULL_TREE, V8QI_type_node,
15346 tree di_ftype_di_v4hi_v4hi
15347 = build_function_type (long_long_unsigned_type_node,
15348 tree_cons (NULL_TREE,
15349 long_long_unsigned_type_node,
15350 tree_cons (NULL_TREE, V4HI_type_node,
15351 tree_cons (NULL_TREE,
15355 tree di_ftype_v4hi_v4hi
15356 = build_function_type (long_long_unsigned_type_node,
15357 tree_cons (NULL_TREE, V4HI_type_node,
15358 tree_cons (NULL_TREE, V4HI_type_node,
15361 /* Normal vector binops. */
15362 tree v8qi_ftype_v8qi_v8qi
15363 = build_function_type (V8QI_type_node,
15364 tree_cons (NULL_TREE, V8QI_type_node,
15365 tree_cons (NULL_TREE, V8QI_type_node,
15367 tree v4hi_ftype_v4hi_v4hi
15368 = build_function_type (V4HI_type_node,
15369 tree_cons (NULL_TREE, V4HI_type_node,
15370 tree_cons (NULL_TREE, V4HI_type_node,
15372 tree v2si_ftype_v2si_v2si
15373 = build_function_type (V2SI_type_node,
15374 tree_cons (NULL_TREE, V2SI_type_node,
15375 tree_cons (NULL_TREE, V2SI_type_node,
15377 tree di_ftype_di_di
15378 = build_function_type (long_long_unsigned_type_node,
15379 tree_cons (NULL_TREE, long_long_unsigned_type_node,
15380 tree_cons (NULL_TREE,
15381 long_long_unsigned_type_node,
15384 /* Add all builtins that are more or less simple operations on two
15386 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15388 /* Use one of the operands; the target can have a different mode for
15389 mask-generating compares. */
15390 enum machine_mode mode;
15396 mode = insn_data[d->icode].operand[1].mode;
15401 type = v8qi_ftype_v8qi_v8qi;
15404 type = v4hi_ftype_v4hi_v4hi;
15407 type = v2si_ftype_v2si_v2si;
15410 type = di_ftype_di_di;
15414 gcc_unreachable ();
15417 def_mbuiltin (d->mask, d->name, type, d->code);
15420 /* Add the remaining MMX insns with somewhat more complicated types. */
15421 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
15422 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
15423 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
15425 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
15426 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
15427 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
15428 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
15429 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
15430 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
15432 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
15433 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
15434 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
15435 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
15436 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
15437 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
15439 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
15440 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
15441 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
15442 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
15443 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
15444 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
15446 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
15447 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
15448 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
15449 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
15450 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
15451 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
15453 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
15455 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
15456 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
15457 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
15458 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
15460 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
15461 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
15462 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
15463 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
15464 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
15465 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
15466 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
15467 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
15468 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
15470 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
15471 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
15472 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
15474 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
15475 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
15476 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
15478 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
15479 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
15480 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
15481 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
15482 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
15483 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
15485 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
15486 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
15487 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
15488 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
15489 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
15490 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
15491 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
15492 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
15493 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
15494 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
15495 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
15496 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
15498 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
15499 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
15500 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
15501 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
15503 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
15504 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
15505 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
15506 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
15507 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
15508 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
15509 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
15513 arm_init_tls_builtins (void)
15517 ftype = build_function_type (ptr_type_node, void_list_node);
15518 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
15519 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
15521 TREE_NOTHROW (decl) = 1;
15522 TREE_READONLY (decl) = 1;
15525 enum neon_builtin_type_bits {
15541 #define v8qi_UP T_V8QI
15542 #define v4hi_UP T_V4HI
15543 #define v2si_UP T_V2SI
15544 #define v2sf_UP T_V2SF
15546 #define v16qi_UP T_V16QI
15547 #define v8hi_UP T_V8HI
15548 #define v4si_UP T_V4SI
15549 #define v4sf_UP T_V4SF
15550 #define v2di_UP T_V2DI
15555 #define UP(X) X##_UP
15590 NEON_LOADSTRUCTLANE,
15592 NEON_STORESTRUCTLANE,
15601 const neon_itype itype;
15603 const enum insn_code codes[T_MAX];
15604 const unsigned int num_vars;
15605 unsigned int base_fcode;
15606 } neon_builtin_datum;
15608 #define CF(N,X) CODE_FOR_neon_##N##X
15610 #define VAR1(T, N, A) \
15611 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
15612 #define VAR2(T, N, A, B) \
15613 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
15614 #define VAR3(T, N, A, B, C) \
15615 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
15616 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
15617 #define VAR4(T, N, A, B, C, D) \
15618 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
15619 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
15620 #define VAR5(T, N, A, B, C, D, E) \
15621 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
15622 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
15623 #define VAR6(T, N, A, B, C, D, E, F) \
15624 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
15625 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
15626 #define VAR7(T, N, A, B, C, D, E, F, G) \
15627 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
15628 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15630 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
15631 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
15633 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15634 CF (N, G), CF (N, H) }, 8, 0
15635 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
15636 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
15637 | UP (H) | UP (I), \
15638 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15639 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
15640 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
15641 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
15642 | UP (H) | UP (I) | UP (J), \
15643 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15644 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
15646 /* The mode entries in the following table correspond to the "key" type of the
15647 instruction variant, i.e. equivalent to that which would be specified after
15648 the assembler mnemonic, which usually refers to the last vector operand.
15649 (Signed/unsigned/polynomial types are not differentiated between though, and
15650 are all mapped onto the same mode for a given element size.) The modes
15651 listed per instruction should be the same as those defined for that
15652 instruction's pattern in neon.md.
15653 WARNING: Variants should be listed in the same increasing order as
15654 neon_builtin_type_bits. */
15656 static neon_builtin_datum neon_builtin_data[] =
15658 { VAR10 (BINOP, vadd,
15659 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15660 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
15661 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
15662 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15663 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15664 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
15665 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15666 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15667 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
15668 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15669 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
15670 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
15671 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
15672 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
15673 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
15674 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
15675 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
15676 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
15677 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
15678 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
15679 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
15680 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
15681 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15682 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15683 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15684 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
15685 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
15686 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
15687 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15688 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15689 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15690 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
15691 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15692 { VAR10 (BINOP, vsub,
15693 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15694 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
15695 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
15696 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15697 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15698 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
15699 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15700 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15701 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15702 { VAR2 (BINOP, vcage, v2sf, v4sf) },
15703 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
15704 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15705 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15706 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
15707 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15708 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
15709 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15710 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15711 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
15712 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15713 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15714 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
15715 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
15716 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
15717 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
15718 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15719 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15720 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15721 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15722 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15723 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15724 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15725 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15726 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
15727 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
15728 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
15729 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15730 /* FIXME: vget_lane supports more variants than this! */
15731 { VAR10 (GETLANE, vget_lane,
15732 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15733 { VAR10 (SETLANE, vset_lane,
15734 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15735 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
15736 { VAR10 (DUP, vdup_n,
15737 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15738 { VAR10 (DUPLANE, vdup_lane,
15739 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15740 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
15741 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
15742 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
15743 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
15744 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
15745 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
15746 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
15747 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15748 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15749 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
15750 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
15751 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15752 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
15753 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
15754 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15755 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15756 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
15757 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
15758 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15759 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
15760 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
15761 { VAR10 (BINOP, vext,
15762 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15763 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15764 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
15765 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
15766 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
15767 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
15768 { VAR10 (SELECT, vbsl,
15769 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15770 { VAR1 (VTBL, vtbl1, v8qi) },
15771 { VAR1 (VTBL, vtbl2, v8qi) },
15772 { VAR1 (VTBL, vtbl3, v8qi) },
15773 { VAR1 (VTBL, vtbl4, v8qi) },
15774 { VAR1 (VTBX, vtbx1, v8qi) },
15775 { VAR1 (VTBX, vtbx2, v8qi) },
15776 { VAR1 (VTBX, vtbx3, v8qi) },
15777 { VAR1 (VTBX, vtbx4, v8qi) },
15778 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15779 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15780 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15781 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
15782 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
15783 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
15784 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
15785 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
15786 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
15787 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
15788 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
15789 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
15790 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
15791 { VAR10 (LOAD1, vld1,
15792 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15793 { VAR10 (LOAD1LANE, vld1_lane,
15794 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15795 { VAR10 (LOAD1, vld1_dup,
15796 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15797 { VAR10 (STORE1, vst1,
15798 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15799 { VAR10 (STORE1LANE, vst1_lane,
15800 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15801 { VAR9 (LOADSTRUCT,
15802 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15803 { VAR7 (LOADSTRUCTLANE, vld2_lane,
15804 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15805 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
15806 { VAR9 (STORESTRUCT, vst2,
15807 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15808 { VAR7 (STORESTRUCTLANE, vst2_lane,
15809 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15810 { VAR9 (LOADSTRUCT,
15811 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15812 { VAR7 (LOADSTRUCTLANE, vld3_lane,
15813 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15814 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
15815 { VAR9 (STORESTRUCT, vst3,
15816 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15817 { VAR7 (STORESTRUCTLANE, vst3_lane,
15818 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15819 { VAR9 (LOADSTRUCT, vld4,
15820 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15821 { VAR7 (LOADSTRUCTLANE, vld4_lane,
15822 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15823 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
15824 { VAR9 (STORESTRUCT, vst4,
15825 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15826 { VAR7 (STORESTRUCTLANE, vst4_lane,
15827 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15828 { VAR10 (LOGICBINOP, vand,
15829 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15830 { VAR10 (LOGICBINOP, vorr,
15831 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15832 { VAR10 (BINOP, veor,
15833 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15834 { VAR10 (LOGICBINOP, vbic,
15835 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15836 { VAR10 (LOGICBINOP, vorn,
15837 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
15853 arm_init_neon_builtins (void)
15855 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
15857 tree neon_intQI_type_node;
15858 tree neon_intHI_type_node;
15859 tree neon_polyQI_type_node;
15860 tree neon_polyHI_type_node;
15861 tree neon_intSI_type_node;
15862 tree neon_intDI_type_node;
15863 tree neon_float_type_node;
15865 tree intQI_pointer_node;
15866 tree intHI_pointer_node;
15867 tree intSI_pointer_node;
15868 tree intDI_pointer_node;
15869 tree float_pointer_node;
15871 tree const_intQI_node;
15872 tree const_intHI_node;
15873 tree const_intSI_node;
15874 tree const_intDI_node;
15875 tree const_float_node;
15877 tree const_intQI_pointer_node;
15878 tree const_intHI_pointer_node;
15879 tree const_intSI_pointer_node;
15880 tree const_intDI_pointer_node;
15881 tree const_float_pointer_node;
15883 tree V8QI_type_node;
15884 tree V4HI_type_node;
15885 tree V2SI_type_node;
15886 tree V2SF_type_node;
15887 tree V16QI_type_node;
15888 tree V8HI_type_node;
15889 tree V4SI_type_node;
15890 tree V4SF_type_node;
15891 tree V2DI_type_node;
15893 tree intUQI_type_node;
15894 tree intUHI_type_node;
15895 tree intUSI_type_node;
15896 tree intUDI_type_node;
15898 tree intEI_type_node;
15899 tree intOI_type_node;
15900 tree intCI_type_node;
15901 tree intXI_type_node;
15903 tree V8QI_pointer_node;
15904 tree V4HI_pointer_node;
15905 tree V2SI_pointer_node;
15906 tree V2SF_pointer_node;
15907 tree V16QI_pointer_node;
15908 tree V8HI_pointer_node;
15909 tree V4SI_pointer_node;
15910 tree V4SF_pointer_node;
15911 tree V2DI_pointer_node;
15913 tree void_ftype_pv8qi_v8qi_v8qi;
15914 tree void_ftype_pv4hi_v4hi_v4hi;
15915 tree void_ftype_pv2si_v2si_v2si;
15916 tree void_ftype_pv2sf_v2sf_v2sf;
15917 tree void_ftype_pdi_di_di;
15918 tree void_ftype_pv16qi_v16qi_v16qi;
15919 tree void_ftype_pv8hi_v8hi_v8hi;
15920 tree void_ftype_pv4si_v4si_v4si;
15921 tree void_ftype_pv4sf_v4sf_v4sf;
15922 tree void_ftype_pv2di_v2di_v2di;
15924 tree reinterp_ftype_dreg[5][5];
15925 tree reinterp_ftype_qreg[5][5];
15926 tree dreg_types[5], qreg_types[5];
15928 /* Create distinguished type nodes for NEON vector element types,
15929 and pointers to values of such types, so we can detect them later. */
15930 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15931 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15932 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15933 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15934 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
15935 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
15936 neon_float_type_node = make_node (REAL_TYPE);
15937 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
15938 layout_type (neon_float_type_node);
15940 /* Define typedefs which exactly correspond to the modes we are basing vector
15941 types on. If you change these names you'll need to change
15942 the table used by arm_mangle_type too. */
15943 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
15944 "__builtin_neon_qi");
15945 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
15946 "__builtin_neon_hi");
15947 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
15948 "__builtin_neon_si");
15949 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
15950 "__builtin_neon_sf");
15951 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
15952 "__builtin_neon_di");
15953 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
15954 "__builtin_neon_poly8");
15955 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
15956 "__builtin_neon_poly16");
15958 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
15959 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
15960 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
15961 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
15962 float_pointer_node = build_pointer_type (neon_float_type_node);
15964 /* Next create constant-qualified versions of the above types. */
15965 const_intQI_node = build_qualified_type (neon_intQI_type_node,
15967 const_intHI_node = build_qualified_type (neon_intHI_type_node,
15969 const_intSI_node = build_qualified_type (neon_intSI_type_node,
15971 const_intDI_node = build_qualified_type (neon_intDI_type_node,
15973 const_float_node = build_qualified_type (neon_float_type_node,
15976 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
15977 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
15978 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
15979 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
15980 const_float_pointer_node = build_pointer_type (const_float_node);
15982 /* Now create vector types based on our NEON element types. */
15983 /* 64-bit vectors. */
15985 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
15987 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
15989 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
15991 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
15992 /* 128-bit vectors. */
15994 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
15996 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
15998 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
16000 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
16002 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
16004 /* Unsigned integer types for various mode sizes. */
16005 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
16006 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
16007 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
16008 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
16010 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
16011 "__builtin_neon_uqi");
16012 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
16013 "__builtin_neon_uhi");
16014 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
16015 "__builtin_neon_usi");
16016 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
16017 "__builtin_neon_udi");
16019 /* Opaque integer types for structures of vectors. */
16020 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
16021 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
16022 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
16023 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
16025 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
16026 "__builtin_neon_ti");
16027 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
16028 "__builtin_neon_ei");
16029 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
16030 "__builtin_neon_oi");
16031 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
16032 "__builtin_neon_ci");
16033 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
16034 "__builtin_neon_xi");
16036 /* Pointers to vector types. */
16037 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
16038 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
16039 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
16040 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
16041 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
16042 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
16043 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
16044 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
16045 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
16047 /* Operations which return results as pairs. */
16048 void_ftype_pv8qi_v8qi_v8qi =
16049 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
16050 V8QI_type_node, NULL);
16051 void_ftype_pv4hi_v4hi_v4hi =
16052 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
16053 V4HI_type_node, NULL);
16054 void_ftype_pv2si_v2si_v2si =
16055 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
16056 V2SI_type_node, NULL);
16057 void_ftype_pv2sf_v2sf_v2sf =
16058 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
16059 V2SF_type_node, NULL);
16060 void_ftype_pdi_di_di =
16061 build_function_type_list (void_type_node, intDI_pointer_node,
16062 neon_intDI_type_node, neon_intDI_type_node, NULL);
16063 void_ftype_pv16qi_v16qi_v16qi =
16064 build_function_type_list (void_type_node, V16QI_pointer_node,
16065 V16QI_type_node, V16QI_type_node, NULL);
16066 void_ftype_pv8hi_v8hi_v8hi =
16067 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
16068 V8HI_type_node, NULL);
16069 void_ftype_pv4si_v4si_v4si =
16070 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
16071 V4SI_type_node, NULL);
16072 void_ftype_pv4sf_v4sf_v4sf =
16073 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
16074 V4SF_type_node, NULL);
16075 void_ftype_pv2di_v2di_v2di =
16076 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
16077 V2DI_type_node, NULL);
16079 dreg_types[0] = V8QI_type_node;
16080 dreg_types[1] = V4HI_type_node;
16081 dreg_types[2] = V2SI_type_node;
16082 dreg_types[3] = V2SF_type_node;
16083 dreg_types[4] = neon_intDI_type_node;
16085 qreg_types[0] = V16QI_type_node;
16086 qreg_types[1] = V8HI_type_node;
16087 qreg_types[2] = V4SI_type_node;
16088 qreg_types[3] = V4SF_type_node;
16089 qreg_types[4] = V2DI_type_node;
16091 for (i = 0; i < 5; i++)
16094 for (j = 0; j < 5; j++)
16096 reinterp_ftype_dreg[i][j]
16097 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
16098 reinterp_ftype_qreg[i][j]
16099 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
16103 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
16105 neon_builtin_datum *d = &neon_builtin_data[i];
16106 unsigned int j, codeidx = 0;
16108 d->base_fcode = fcode;
16110 for (j = 0; j < T_MAX; j++)
16112 const char* const modenames[] = {
16113 "v8qi", "v4hi", "v2si", "v2sf", "di",
16114 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
16118 enum insn_code icode;
16119 int is_load = 0, is_store = 0;
16121 if ((d->bits & (1 << j)) == 0)
16124 icode = d->codes[codeidx++];
16129 case NEON_LOAD1LANE:
16130 case NEON_LOADSTRUCT:
16131 case NEON_LOADSTRUCTLANE:
16133 /* Fall through. */
16135 case NEON_STORE1LANE:
16136 case NEON_STORESTRUCT:
16137 case NEON_STORESTRUCTLANE:
16140 /* Fall through. */
16143 case NEON_LOGICBINOP:
16144 case NEON_SHIFTINSERT:
16151 case NEON_SHIFTIMM:
16152 case NEON_SHIFTACC:
16158 case NEON_LANEMULL:
16159 case NEON_LANEMULH:
16161 case NEON_SCALARMUL:
16162 case NEON_SCALARMULL:
16163 case NEON_SCALARMULH:
16164 case NEON_SCALARMAC:
16170 tree return_type = void_type_node, args = void_list_node;
16172 /* Build a function type directly from the insn_data for this
16173 builtin. The build_function_type() function takes care of
16174 removing duplicates for us. */
16175 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
16179 if (is_load && k == 1)
16181 /* Neon load patterns always have the memory operand
16182 (a SImode pointer) in the operand 1 position. We
16183 want a const pointer to the element type in that
16185 gcc_assert (insn_data[icode].operand[k].mode == SImode);
16191 eltype = const_intQI_pointer_node;
16196 eltype = const_intHI_pointer_node;
16201 eltype = const_intSI_pointer_node;
16206 eltype = const_float_pointer_node;
16211 eltype = const_intDI_pointer_node;
16214 default: gcc_unreachable ();
16217 else if (is_store && k == 0)
16219 /* Similarly, Neon store patterns use operand 0 as
16220 the memory location to store to (a SImode pointer).
16221 Use a pointer to the element type of the store in
16223 gcc_assert (insn_data[icode].operand[k].mode == SImode);
16229 eltype = intQI_pointer_node;
16234 eltype = intHI_pointer_node;
16239 eltype = intSI_pointer_node;
16244 eltype = float_pointer_node;
16249 eltype = intDI_pointer_node;
16252 default: gcc_unreachable ();
16257 switch (insn_data[icode].operand[k].mode)
16259 case VOIDmode: eltype = void_type_node; break;
16261 case QImode: eltype = neon_intQI_type_node; break;
16262 case HImode: eltype = neon_intHI_type_node; break;
16263 case SImode: eltype = neon_intSI_type_node; break;
16264 case SFmode: eltype = neon_float_type_node; break;
16265 case DImode: eltype = neon_intDI_type_node; break;
16266 case TImode: eltype = intTI_type_node; break;
16267 case EImode: eltype = intEI_type_node; break;
16268 case OImode: eltype = intOI_type_node; break;
16269 case CImode: eltype = intCI_type_node; break;
16270 case XImode: eltype = intXI_type_node; break;
16271 /* 64-bit vectors. */
16272 case V8QImode: eltype = V8QI_type_node; break;
16273 case V4HImode: eltype = V4HI_type_node; break;
16274 case V2SImode: eltype = V2SI_type_node; break;
16275 case V2SFmode: eltype = V2SF_type_node; break;
16276 /* 128-bit vectors. */
16277 case V16QImode: eltype = V16QI_type_node; break;
16278 case V8HImode: eltype = V8HI_type_node; break;
16279 case V4SImode: eltype = V4SI_type_node; break;
16280 case V4SFmode: eltype = V4SF_type_node; break;
16281 case V2DImode: eltype = V2DI_type_node; break;
16282 default: gcc_unreachable ();
16286 if (k == 0 && !is_store)
16287 return_type = eltype;
16289 args = tree_cons (NULL_TREE, eltype, args);
16292 ftype = build_function_type (return_type, args);
16296 case NEON_RESULTPAIR:
16298 switch (insn_data[icode].operand[1].mode)
16300 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
16301 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
16302 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
16303 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
16304 case DImode: ftype = void_ftype_pdi_di_di; break;
16305 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
16306 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
16307 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
16308 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
16309 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
16310 default: gcc_unreachable ();
16315 case NEON_REINTERP:
16317 /* We iterate over 5 doubleword types, then 5 quadword
16320 switch (insn_data[icode].operand[0].mode)
16322 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
16323 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
16324 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
16325 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
16326 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
16327 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
16328 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
16329 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
16330 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
16331 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
16332 default: gcc_unreachable ();
16338 gcc_unreachable ();
16341 gcc_assert (ftype != NULL);
16343 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
16345 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
16352 arm_init_fp16_builtins (void)
16354 tree fp16_type = make_node (REAL_TYPE);
16355 TYPE_PRECISION (fp16_type) = 16;
16356 layout_type (fp16_type);
16357 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
16361 arm_init_builtins (void)
16363 arm_init_tls_builtins ();
16365 if (TARGET_REALLY_IWMMXT)
16366 arm_init_iwmmxt_builtins ();
16369 arm_init_neon_builtins ();
16371 if (arm_fp16_format)
16372 arm_init_fp16_builtins ();
16375 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
16377 static const char *
16378 arm_invalid_parameter_type (const_tree t)
16380 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
16381 return N_("function parameters cannot have __fp16 type");
16385 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
16387 static const char *
16388 arm_invalid_return_type (const_tree t)
16390 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
16391 return N_("functions cannot return __fp16 type");
16395 /* Implement TARGET_PROMOTED_TYPE. */
16398 arm_promoted_type (const_tree t)
16400 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
16401 return float_type_node;
16405 /* Implement TARGET_CONVERT_TO_TYPE.
16406 Specifically, this hook implements the peculiarity of the ARM
16407 half-precision floating-point C semantics that requires conversions between
16408 __fp16 to or from double to do an intermediate conversion to float. */
16411 arm_convert_to_type (tree type, tree expr)
16413 tree fromtype = TREE_TYPE (expr);
16414 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
16416 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
16417 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
16418 return convert (type, convert (float_type_node, expr));
16422 /* Errors in the source file can cause expand_expr to return const0_rtx
16423 where we expect a vector. To avoid crashing, use one of the vector
16424 clear instructions. */
16427 safe_vector_operand (rtx x, enum machine_mode mode)
16429 if (x != const0_rtx)
16431 x = gen_reg_rtx (mode);
16433 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
16434 : gen_rtx_SUBREG (DImode, x, 0)));
16438 /* Subroutine of arm_expand_builtin to take care of binop insns. */
16441 arm_expand_binop_builtin (enum insn_code icode,
16442 tree exp, rtx target)
16445 tree arg0 = CALL_EXPR_ARG (exp, 0);
16446 tree arg1 = CALL_EXPR_ARG (exp, 1);
16447 rtx op0 = expand_normal (arg0);
16448 rtx op1 = expand_normal (arg1);
16449 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16450 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16451 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16453 if (VECTOR_MODE_P (mode0))
16454 op0 = safe_vector_operand (op0, mode0);
16455 if (VECTOR_MODE_P (mode1))
16456 op1 = safe_vector_operand (op1, mode1);
16459 || GET_MODE (target) != tmode
16460 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16461 target = gen_reg_rtx (tmode);
16463 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
16465 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16466 op0 = copy_to_mode_reg (mode0, op0);
16467 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16468 op1 = copy_to_mode_reg (mode1, op1);
16470 pat = GEN_FCN (icode) (target, op0, op1);
16477 /* Subroutine of arm_expand_builtin to take care of unop insns. */
16480 arm_expand_unop_builtin (enum insn_code icode,
16481 tree exp, rtx target, int do_load)
16484 tree arg0 = CALL_EXPR_ARG (exp, 0);
16485 rtx op0 = expand_normal (arg0);
16486 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16487 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16490 || GET_MODE (target) != tmode
16491 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16492 target = gen_reg_rtx (tmode);
16494 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16497 if (VECTOR_MODE_P (mode0))
16498 op0 = safe_vector_operand (op0, mode0);
16500 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16501 op0 = copy_to_mode_reg (mode0, op0);
16504 pat = GEN_FCN (icode) (target, op0);
16512 neon_builtin_compare (const void *a, const void *b)
16514 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
16515 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
16516 unsigned int soughtcode = key->base_fcode;
16518 if (soughtcode >= memb->base_fcode
16519 && soughtcode < memb->base_fcode + memb->num_vars)
16521 else if (soughtcode < memb->base_fcode)
16527 static enum insn_code
16528 locate_neon_builtin_icode (int fcode, neon_itype *itype)
16530 neon_builtin_datum key, *found;
16533 key.base_fcode = fcode;
16534 found = (neon_builtin_datum *)
16535 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
16536 sizeof (neon_builtin_data[0]), neon_builtin_compare);
16537 gcc_assert (found);
16538 idx = fcode - (int) found->base_fcode;
16539 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
16542 *itype = found->itype;
16544 return found->codes[idx];
16548 NEON_ARG_COPY_TO_REG,
16553 #define NEON_MAX_BUILTIN_ARGS 5
16555 /* Expand a Neon builtin. */
16557 arm_expand_neon_args (rtx target, int icode, int have_retval,
16562 tree arg[NEON_MAX_BUILTIN_ARGS];
16563 rtx op[NEON_MAX_BUILTIN_ARGS];
16564 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16565 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
16570 || GET_MODE (target) != tmode
16571 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
16572 target = gen_reg_rtx (tmode);
16574 va_start (ap, exp);
16578 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
16580 if (thisarg == NEON_ARG_STOP)
16584 arg[argc] = CALL_EXPR_ARG (exp, argc);
16585 op[argc] = expand_normal (arg[argc]);
16586 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
16590 case NEON_ARG_COPY_TO_REG:
16591 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
16592 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
16593 (op[argc], mode[argc]))
16594 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
16597 case NEON_ARG_CONSTANT:
16598 /* FIXME: This error message is somewhat unhelpful. */
16599 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
16600 (op[argc], mode[argc]))
16601 error ("argument must be a constant");
16604 case NEON_ARG_STOP:
16605 gcc_unreachable ();
16618 pat = GEN_FCN (icode) (target, op[0]);
16622 pat = GEN_FCN (icode) (target, op[0], op[1]);
16626 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
16630 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
16634 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
16638 gcc_unreachable ();
16644 pat = GEN_FCN (icode) (op[0]);
16648 pat = GEN_FCN (icode) (op[0], op[1]);
16652 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
16656 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
16660 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
16664 gcc_unreachable ();
16675 /* Expand a Neon builtin. These are "special" because they don't have symbolic
16676 constants defined per-instruction or per instruction-variant. Instead, the
16677 required info is looked up in the table neon_builtin_data. */
16679 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
16682 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
16689 return arm_expand_neon_args (target, icode, 1, exp,
16690 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
16694 case NEON_SCALARMUL:
16695 case NEON_SCALARMULL:
16696 case NEON_SCALARMULH:
16697 case NEON_SHIFTINSERT:
16698 case NEON_LOGICBINOP:
16699 return arm_expand_neon_args (target, icode, 1, exp,
16700 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16704 return arm_expand_neon_args (target, icode, 1, exp,
16705 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16706 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16710 case NEON_SHIFTIMM:
16711 return arm_expand_neon_args (target, icode, 1, exp,
16712 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
16716 return arm_expand_neon_args (target, icode, 1, exp,
16717 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16721 case NEON_REINTERP:
16722 return arm_expand_neon_args (target, icode, 1, exp,
16723 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16727 return arm_expand_neon_args (target, icode, 1, exp,
16728 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16730 case NEON_RESULTPAIR:
16731 return arm_expand_neon_args (target, icode, 0, exp,
16732 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16736 case NEON_LANEMULL:
16737 case NEON_LANEMULH:
16738 return arm_expand_neon_args (target, icode, 1, exp,
16739 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16740 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16743 return arm_expand_neon_args (target, icode, 1, exp,
16744 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16745 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
16747 case NEON_SHIFTACC:
16748 return arm_expand_neon_args (target, icode, 1, exp,
16749 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16750 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16752 case NEON_SCALARMAC:
16753 return arm_expand_neon_args (target, icode, 1, exp,
16754 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16755 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16759 return arm_expand_neon_args (target, icode, 1, exp,
16760 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16764 case NEON_LOADSTRUCT:
16765 return arm_expand_neon_args (target, icode, 1, exp,
16766 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16768 case NEON_LOAD1LANE:
16769 case NEON_LOADSTRUCTLANE:
16770 return arm_expand_neon_args (target, icode, 1, exp,
16771 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16775 case NEON_STORESTRUCT:
16776 return arm_expand_neon_args (target, icode, 0, exp,
16777 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16779 case NEON_STORE1LANE:
16780 case NEON_STORESTRUCTLANE:
16781 return arm_expand_neon_args (target, icode, 0, exp,
16782 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16786 gcc_unreachable ();
16789 /* Emit code to reinterpret one Neon type as another, without altering bits. */
16791 neon_reinterpret (rtx dest, rtx src)
16793 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
16796 /* Emit code to place a Neon pair result in memory locations (with equal
16799 neon_emit_pair_result_insn (enum machine_mode mode,
16800 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
16803 rtx mem = gen_rtx_MEM (mode, destaddr);
16804 rtx tmp1 = gen_reg_rtx (mode);
16805 rtx tmp2 = gen_reg_rtx (mode);
16807 emit_insn (intfn (tmp1, op1, tmp2, op2));
16809 emit_move_insn (mem, tmp1);
16810 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
16811 emit_move_insn (mem, tmp2);
16814 /* Set up operands for a register copy from src to dest, taking care not to
16815 clobber registers in the process.
16816 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
16817 be called with a large N, so that should be OK. */
16820 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
16822 unsigned int copied = 0, opctr = 0;
16823 unsigned int done = (1 << count) - 1;
16826 while (copied != done)
16828 for (i = 0; i < count; i++)
16832 for (j = 0; good && j < count; j++)
16833 if (i != j && (copied & (1 << j)) == 0
16834 && reg_overlap_mentioned_p (src[j], dest[i]))
16839 operands[opctr++] = dest[i];
16840 operands[opctr++] = src[i];
16846 gcc_assert (opctr == count * 2);
16849 /* Expand an expression EXP that calls a built-in function,
16850 with result going to TARGET if that's convenient
16851 (and in mode MODE if that's convenient).
16852 SUBTARGET may be used as the target for computing one of EXP's operands.
16853 IGNORE is nonzero if the value is to be ignored. */
16856 arm_expand_builtin (tree exp,
16858 rtx subtarget ATTRIBUTE_UNUSED,
16859 enum machine_mode mode ATTRIBUTE_UNUSED,
16860 int ignore ATTRIBUTE_UNUSED)
16862 const struct builtin_description * d;
16863 enum insn_code icode;
16864 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16872 int fcode = DECL_FUNCTION_CODE (fndecl);
16874 enum machine_mode tmode;
16875 enum machine_mode mode0;
16876 enum machine_mode mode1;
16877 enum machine_mode mode2;
16879 if (fcode >= ARM_BUILTIN_NEON_BASE)
16880 return arm_expand_neon_builtin (fcode, exp, target);
16884 case ARM_BUILTIN_TEXTRMSB:
16885 case ARM_BUILTIN_TEXTRMUB:
16886 case ARM_BUILTIN_TEXTRMSH:
16887 case ARM_BUILTIN_TEXTRMUH:
16888 case ARM_BUILTIN_TEXTRMSW:
16889 case ARM_BUILTIN_TEXTRMUW:
16890 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
16891 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
16892 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
16893 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
16894 : CODE_FOR_iwmmxt_textrmw);
16896 arg0 = CALL_EXPR_ARG (exp, 0);
16897 arg1 = CALL_EXPR_ARG (exp, 1);
16898 op0 = expand_normal (arg0);
16899 op1 = expand_normal (arg1);
16900 tmode = insn_data[icode].operand[0].mode;
16901 mode0 = insn_data[icode].operand[1].mode;
16902 mode1 = insn_data[icode].operand[2].mode;
16904 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16905 op0 = copy_to_mode_reg (mode0, op0);
16906 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16908 /* @@@ better error message */
16909 error ("selector must be an immediate");
16910 return gen_reg_rtx (tmode);
16913 || GET_MODE (target) != tmode
16914 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16915 target = gen_reg_rtx (tmode);
16916 pat = GEN_FCN (icode) (target, op0, op1);
16922 case ARM_BUILTIN_TINSRB:
16923 case ARM_BUILTIN_TINSRH:
16924 case ARM_BUILTIN_TINSRW:
16925 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
16926 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
16927 : CODE_FOR_iwmmxt_tinsrw);
16928 arg0 = CALL_EXPR_ARG (exp, 0);
16929 arg1 = CALL_EXPR_ARG (exp, 1);
16930 arg2 = CALL_EXPR_ARG (exp, 2);
16931 op0 = expand_normal (arg0);
16932 op1 = expand_normal (arg1);
16933 op2 = expand_normal (arg2);
16934 tmode = insn_data[icode].operand[0].mode;
16935 mode0 = insn_data[icode].operand[1].mode;
16936 mode1 = insn_data[icode].operand[2].mode;
16937 mode2 = insn_data[icode].operand[3].mode;
16939 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16940 op0 = copy_to_mode_reg (mode0, op0);
16941 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16942 op1 = copy_to_mode_reg (mode1, op1);
16943 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16945 /* @@@ better error message */
16946 error ("selector must be an immediate");
16950 || GET_MODE (target) != tmode
16951 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16952 target = gen_reg_rtx (tmode);
16953 pat = GEN_FCN (icode) (target, op0, op1, op2);
16959 case ARM_BUILTIN_SETWCX:
16960 arg0 = CALL_EXPR_ARG (exp, 0);
16961 arg1 = CALL_EXPR_ARG (exp, 1);
16962 op0 = force_reg (SImode, expand_normal (arg0));
16963 op1 = expand_normal (arg1);
16964 emit_insn (gen_iwmmxt_tmcr (op1, op0));
16967 case ARM_BUILTIN_GETWCX:
16968 arg0 = CALL_EXPR_ARG (exp, 0);
16969 op0 = expand_normal (arg0);
16970 target = gen_reg_rtx (SImode);
16971 emit_insn (gen_iwmmxt_tmrc (target, op0));
16974 case ARM_BUILTIN_WSHUFH:
16975 icode = CODE_FOR_iwmmxt_wshufh;
16976 arg0 = CALL_EXPR_ARG (exp, 0);
16977 arg1 = CALL_EXPR_ARG (exp, 1);
16978 op0 = expand_normal (arg0);
16979 op1 = expand_normal (arg1);
16980 tmode = insn_data[icode].operand[0].mode;
16981 mode1 = insn_data[icode].operand[1].mode;
16982 mode2 = insn_data[icode].operand[2].mode;
16984 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16985 op0 = copy_to_mode_reg (mode1, op0);
16986 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16988 /* @@@ better error message */
16989 error ("mask must be an immediate");
16993 || GET_MODE (target) != tmode
16994 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16995 target = gen_reg_rtx (tmode);
16996 pat = GEN_FCN (icode) (target, op0, op1);
17002 case ARM_BUILTIN_WSADB:
17003 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
17004 case ARM_BUILTIN_WSADH:
17005 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
17006 case ARM_BUILTIN_WSADBZ:
17007 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
17008 case ARM_BUILTIN_WSADHZ:
17009 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
17011 /* Several three-argument builtins. */
17012 case ARM_BUILTIN_WMACS:
17013 case ARM_BUILTIN_WMACU:
17014 case ARM_BUILTIN_WALIGN:
17015 case ARM_BUILTIN_TMIA:
17016 case ARM_BUILTIN_TMIAPH:
17017 case ARM_BUILTIN_TMIATT:
17018 case ARM_BUILTIN_TMIATB:
17019 case ARM_BUILTIN_TMIABT:
17020 case ARM_BUILTIN_TMIABB:
17021 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
17022 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
17023 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
17024 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
17025 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
17026 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
17027 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
17028 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
17029 : CODE_FOR_iwmmxt_walign);
17030 arg0 = CALL_EXPR_ARG (exp, 0);
17031 arg1 = CALL_EXPR_ARG (exp, 1);
17032 arg2 = CALL_EXPR_ARG (exp, 2);
17033 op0 = expand_normal (arg0);
17034 op1 = expand_normal (arg1);
17035 op2 = expand_normal (arg2);
17036 tmode = insn_data[icode].operand[0].mode;
17037 mode0 = insn_data[icode].operand[1].mode;
17038 mode1 = insn_data[icode].operand[2].mode;
17039 mode2 = insn_data[icode].operand[3].mode;
17041 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17042 op0 = copy_to_mode_reg (mode0, op0);
17043 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
17044 op1 = copy_to_mode_reg (mode1, op1);
17045 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17046 op2 = copy_to_mode_reg (mode2, op2);
17048 || GET_MODE (target) != tmode
17049 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17050 target = gen_reg_rtx (tmode);
17051 pat = GEN_FCN (icode) (target, op0, op1, op2);
17057 case ARM_BUILTIN_WZERO:
17058 target = gen_reg_rtx (DImode);
17059 emit_insn (gen_iwmmxt_clrdi (target));
17062 case ARM_BUILTIN_THREAD_POINTER:
17063 return arm_load_tp (target);
17069 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17070 if (d->code == (const enum arm_builtins) fcode)
17071 return arm_expand_binop_builtin (d->icode, exp, target);
17073 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17074 if (d->code == (const enum arm_builtins) fcode)
17075 return arm_expand_unop_builtin (d->icode, exp, target, 0);
17077 /* @@@ Should really do something sensible here. */
17081 /* Return the number (counting from 0) of
17082 the least significant set bit in MASK. */
17085 number_of_first_bit_set (unsigned mask)
17090 (mask & (1 << bit)) == 0;
17097 /* Emit code to push or pop registers to or from the stack. F is the
17098 assembly file. MASK is the registers to push or pop. PUSH is
17099 nonzero if we should push, and zero if we should pop. For debugging
17100 output, if pushing, adjust CFA_OFFSET by the amount of space added
17101 to the stack. REAL_REGS should have the same number of bits set as
17102 MASK, and will be used instead (in the same order) to describe which
17103 registers were saved - this is used to mark the save slots when we
17104 push high registers after moving them to low registers. */
17106 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
17107 unsigned long real_regs)
17110 int lo_mask = mask & 0xFF;
17111 int pushed_words = 0;
17115 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
17117 /* Special case. Do not generate a POP PC statement here, do it in
17119 thumb_exit (f, -1);
17123 if (ARM_EABI_UNWIND_TABLES && push)
17125 fprintf (f, "\t.save\t{");
17126 for (regno = 0; regno < 15; regno++)
17128 if (real_regs & (1 << regno))
17130 if (real_regs & ((1 << regno) -1))
17132 asm_fprintf (f, "%r", regno);
17135 fprintf (f, "}\n");
17138 fprintf (f, "\t%s\t{", push ? "push" : "pop");
17140 /* Look at the low registers first. */
17141 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
17145 asm_fprintf (f, "%r", regno);
17147 if ((lo_mask & ~1) != 0)
17154 if (push && (mask & (1 << LR_REGNUM)))
17156 /* Catch pushing the LR. */
17160 asm_fprintf (f, "%r", LR_REGNUM);
17164 else if (!push && (mask & (1 << PC_REGNUM)))
17166 /* Catch popping the PC. */
17167 if (TARGET_INTERWORK || TARGET_BACKTRACE
17168 || crtl->calls_eh_return)
17170 /* The PC is never poped directly, instead
17171 it is popped into r3 and then BX is used. */
17172 fprintf (f, "}\n");
17174 thumb_exit (f, -1);
17183 asm_fprintf (f, "%r", PC_REGNUM);
17187 fprintf (f, "}\n");
17189 if (push && pushed_words && dwarf2out_do_frame ())
17191 char *l = dwarf2out_cfi_label (false);
17192 int pushed_mask = real_regs;
17194 *cfa_offset += pushed_words * 4;
17195 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
17198 pushed_mask = real_regs;
17199 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
17201 if (pushed_mask & 1)
17202 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
17207 /* Generate code to return from a thumb function.
17208 If 'reg_containing_return_addr' is -1, then the return address is
17209 actually on the stack, at the stack pointer. */
17211 thumb_exit (FILE *f, int reg_containing_return_addr)
17213 unsigned regs_available_for_popping;
17214 unsigned regs_to_pop;
17216 unsigned available;
17220 int restore_a4 = FALSE;
17222 /* Compute the registers we need to pop. */
17226 if (reg_containing_return_addr == -1)
17228 regs_to_pop |= 1 << LR_REGNUM;
17232 if (TARGET_BACKTRACE)
17234 /* Restore the (ARM) frame pointer and stack pointer. */
17235 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
17239 /* If there is nothing to pop then just emit the BX instruction and
17241 if (pops_needed == 0)
17243 if (crtl->calls_eh_return)
17244 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
17246 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
17249 /* Otherwise if we are not supporting interworking and we have not created
17250 a backtrace structure and the function was not entered in ARM mode then
17251 just pop the return address straight into the PC. */
17252 else if (!TARGET_INTERWORK
17253 && !TARGET_BACKTRACE
17254 && !is_called_in_ARM_mode (current_function_decl)
17255 && !crtl->calls_eh_return)
17257 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
17261 /* Find out how many of the (return) argument registers we can corrupt. */
17262 regs_available_for_popping = 0;
17264 /* If returning via __builtin_eh_return, the bottom three registers
17265 all contain information needed for the return. */
17266 if (crtl->calls_eh_return)
17270 /* If we can deduce the registers used from the function's
17271 return value. This is more reliable that examining
17272 df_regs_ever_live_p () because that will be set if the register is
17273 ever used in the function, not just if the register is used
17274 to hold a return value. */
17276 if (crtl->return_rtx != 0)
17277 mode = GET_MODE (crtl->return_rtx);
17279 mode = DECL_MODE (DECL_RESULT (current_function_decl));
17281 size = GET_MODE_SIZE (mode);
17285 /* In a void function we can use any argument register.
17286 In a function that returns a structure on the stack
17287 we can use the second and third argument registers. */
17288 if (mode == VOIDmode)
17289 regs_available_for_popping =
17290 (1 << ARG_REGISTER (1))
17291 | (1 << ARG_REGISTER (2))
17292 | (1 << ARG_REGISTER (3));
17294 regs_available_for_popping =
17295 (1 << ARG_REGISTER (2))
17296 | (1 << ARG_REGISTER (3));
17298 else if (size <= 4)
17299 regs_available_for_popping =
17300 (1 << ARG_REGISTER (2))
17301 | (1 << ARG_REGISTER (3));
17302 else if (size <= 8)
17303 regs_available_for_popping =
17304 (1 << ARG_REGISTER (3));
17307 /* Match registers to be popped with registers into which we pop them. */
17308 for (available = regs_available_for_popping,
17309 required = regs_to_pop;
17310 required != 0 && available != 0;
17311 available &= ~(available & - available),
17312 required &= ~(required & - required))
17315 /* If we have any popping registers left over, remove them. */
17317 regs_available_for_popping &= ~available;
17319 /* Otherwise if we need another popping register we can use
17320 the fourth argument register. */
17321 else if (pops_needed)
17323 /* If we have not found any free argument registers and
17324 reg a4 contains the return address, we must move it. */
17325 if (regs_available_for_popping == 0
17326 && reg_containing_return_addr == LAST_ARG_REGNUM)
17328 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
17329 reg_containing_return_addr = LR_REGNUM;
17331 else if (size > 12)
17333 /* Register a4 is being used to hold part of the return value,
17334 but we have dire need of a free, low register. */
17337 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
17340 if (reg_containing_return_addr != LAST_ARG_REGNUM)
17342 /* The fourth argument register is available. */
17343 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
17349 /* Pop as many registers as we can. */
17350 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
17351 regs_available_for_popping);
17353 /* Process the registers we popped. */
17354 if (reg_containing_return_addr == -1)
17356 /* The return address was popped into the lowest numbered register. */
17357 regs_to_pop &= ~(1 << LR_REGNUM);
17359 reg_containing_return_addr =
17360 number_of_first_bit_set (regs_available_for_popping);
17362 /* Remove this register for the mask of available registers, so that
17363 the return address will not be corrupted by further pops. */
17364 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
17367 /* If we popped other registers then handle them here. */
17368 if (regs_available_for_popping)
17372 /* Work out which register currently contains the frame pointer. */
17373 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
17375 /* Move it into the correct place. */
17376 asm_fprintf (f, "\tmov\t%r, %r\n",
17377 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
17379 /* (Temporarily) remove it from the mask of popped registers. */
17380 regs_available_for_popping &= ~(1 << frame_pointer);
17381 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
17383 if (regs_available_for_popping)
17387 /* We popped the stack pointer as well,
17388 find the register that contains it. */
17389 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
17391 /* Move it into the stack register. */
17392 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
17394 /* At this point we have popped all necessary registers, so
17395 do not worry about restoring regs_available_for_popping
17396 to its correct value:
17398 assert (pops_needed == 0)
17399 assert (regs_available_for_popping == (1 << frame_pointer))
17400 assert (regs_to_pop == (1 << STACK_POINTER)) */
17404 /* Since we have just move the popped value into the frame
17405 pointer, the popping register is available for reuse, and
17406 we know that we still have the stack pointer left to pop. */
17407 regs_available_for_popping |= (1 << frame_pointer);
17411 /* If we still have registers left on the stack, but we no longer have
17412 any registers into which we can pop them, then we must move the return
17413 address into the link register and make available the register that
17415 if (regs_available_for_popping == 0 && pops_needed > 0)
17417 regs_available_for_popping |= 1 << reg_containing_return_addr;
17419 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
17420 reg_containing_return_addr);
17422 reg_containing_return_addr = LR_REGNUM;
17425 /* If we have registers left on the stack then pop some more.
17426 We know that at most we will want to pop FP and SP. */
17427 if (pops_needed > 0)
17432 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
17433 regs_available_for_popping);
17435 /* We have popped either FP or SP.
17436 Move whichever one it is into the correct register. */
17437 popped_into = number_of_first_bit_set (regs_available_for_popping);
17438 move_to = number_of_first_bit_set (regs_to_pop);
17440 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
17442 regs_to_pop &= ~(1 << move_to);
17447 /* If we still have not popped everything then we must have only
17448 had one register available to us and we are now popping the SP. */
17449 if (pops_needed > 0)
17453 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
17454 regs_available_for_popping);
17456 popped_into = number_of_first_bit_set (regs_available_for_popping);
17458 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
17460 assert (regs_to_pop == (1 << STACK_POINTER))
17461 assert (pops_needed == 1)
17465 /* If necessary restore the a4 register. */
17468 if (reg_containing_return_addr != LR_REGNUM)
17470 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
17471 reg_containing_return_addr = LR_REGNUM;
17474 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
17477 if (crtl->calls_eh_return)
17478 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
17480 /* Return to caller. */
17481 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
17486 thumb1_final_prescan_insn (rtx insn)
17488 if (flag_print_asm_name)
17489 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
17490 INSN_ADDRESSES (INSN_UID (insn)));
17494 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
17496 unsigned HOST_WIDE_INT mask = 0xff;
17499 if (val == 0) /* XXX */
17502 for (i = 0; i < 25; i++)
17503 if ((val & (mask << i)) == val)
17509 /* Returns nonzero if the current function contains,
17510 or might contain a far jump. */
17512 thumb_far_jump_used_p (void)
17516 /* This test is only important for leaf functions. */
17517 /* assert (!leaf_function_p ()); */
17519 /* If we have already decided that far jumps may be used,
17520 do not bother checking again, and always return true even if
17521 it turns out that they are not being used. Once we have made
17522 the decision that far jumps are present (and that hence the link
17523 register will be pushed onto the stack) we cannot go back on it. */
17524 if (cfun->machine->far_jump_used)
17527 /* If this function is not being called from the prologue/epilogue
17528 generation code then it must be being called from the
17529 INITIAL_ELIMINATION_OFFSET macro. */
17530 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
17532 /* In this case we know that we are being asked about the elimination
17533 of the arg pointer register. If that register is not being used,
17534 then there are no arguments on the stack, and we do not have to
17535 worry that a far jump might force the prologue to push the link
17536 register, changing the stack offsets. In this case we can just
17537 return false, since the presence of far jumps in the function will
17538 not affect stack offsets.
17540 If the arg pointer is live (or if it was live, but has now been
17541 eliminated and so set to dead) then we do have to test to see if
17542 the function might contain a far jump. This test can lead to some
17543 false negatives, since before reload is completed, then length of
17544 branch instructions is not known, so gcc defaults to returning their
17545 longest length, which in turn sets the far jump attribute to true.
17547 A false negative will not result in bad code being generated, but it
17548 will result in a needless push and pop of the link register. We
17549 hope that this does not occur too often.
17551 If we need doubleword stack alignment this could affect the other
17552 elimination offsets so we can't risk getting it wrong. */
17553 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
17554 cfun->machine->arg_pointer_live = 1;
17555 else if (!cfun->machine->arg_pointer_live)
17559 /* Check to see if the function contains a branch
17560 insn with the far jump attribute set. */
17561 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17563 if (GET_CODE (insn) == JUMP_INSN
17564 /* Ignore tablejump patterns. */
17565 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17566 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
17567 && get_attr_far_jump (insn) == FAR_JUMP_YES
17570 /* Record the fact that we have decided that
17571 the function does use far jumps. */
17572 cfun->machine->far_jump_used = 1;
17580 /* Return nonzero if FUNC must be entered in ARM mode. */
17582 is_called_in_ARM_mode (tree func)
17584 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
17586 /* Ignore the problem about functions whose address is taken. */
17587 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
17591 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
17597 /* The bits which aren't usefully expanded as rtl. */
17599 thumb_unexpanded_epilogue (void)
17601 arm_stack_offsets *offsets;
17603 unsigned long live_regs_mask = 0;
17604 int high_regs_pushed = 0;
17605 int had_to_push_lr;
17608 if (cfun->machine->return_used_this_function != 0)
17611 if (IS_NAKED (arm_current_func_type ()))
17614 offsets = arm_get_frame_offsets ();
17615 live_regs_mask = offsets->saved_regs_mask;
17616 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
17618 /* If we can deduce the registers used from the function's return value.
17619 This is more reliable that examining df_regs_ever_live_p () because that
17620 will be set if the register is ever used in the function, not just if
17621 the register is used to hold a return value. */
17622 size = arm_size_return_regs ();
17624 /* The prolog may have pushed some high registers to use as
17625 work registers. e.g. the testsuite file:
17626 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
17627 compiles to produce:
17628 push {r4, r5, r6, r7, lr}
17632 as part of the prolog. We have to undo that pushing here. */
17634 if (high_regs_pushed)
17636 unsigned long mask = live_regs_mask & 0xff;
17639 /* The available low registers depend on the size of the value we are
17647 /* Oh dear! We have no low registers into which we can pop
17650 ("no low registers available for popping high registers");
17652 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
17653 if (live_regs_mask & (1 << next_hi_reg))
17656 while (high_regs_pushed)
17658 /* Find lo register(s) into which the high register(s) can
17660 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
17662 if (mask & (1 << regno))
17663 high_regs_pushed--;
17664 if (high_regs_pushed == 0)
17668 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
17670 /* Pop the values into the low register(s). */
17671 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
17673 /* Move the value(s) into the high registers. */
17674 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
17676 if (mask & (1 << regno))
17678 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
17681 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
17682 if (live_regs_mask & (1 << next_hi_reg))
17687 live_regs_mask &= ~0x0f00;
17690 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
17691 live_regs_mask &= 0xff;
17693 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
17695 /* Pop the return address into the PC. */
17696 if (had_to_push_lr)
17697 live_regs_mask |= 1 << PC_REGNUM;
17699 /* Either no argument registers were pushed or a backtrace
17700 structure was created which includes an adjusted stack
17701 pointer, so just pop everything. */
17702 if (live_regs_mask)
17703 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
17706 /* We have either just popped the return address into the
17707 PC or it is was kept in LR for the entire function. */
17708 if (!had_to_push_lr)
17709 thumb_exit (asm_out_file, LR_REGNUM);
17713 /* Pop everything but the return address. */
17714 if (live_regs_mask)
17715 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
17718 if (had_to_push_lr)
17722 /* We have no free low regs, so save one. */
17723 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
17727 /* Get the return address into a temporary register. */
17728 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
17729 1 << LAST_ARG_REGNUM);
17733 /* Move the return address to lr. */
17734 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
17736 /* Restore the low register. */
17737 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
17742 regno = LAST_ARG_REGNUM;
17747 /* Remove the argument registers that were pushed onto the stack. */
17748 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
17749 SP_REGNUM, SP_REGNUM,
17750 crtl->args.pretend_args_size);
17752 thumb_exit (asm_out_file, regno);
17758 /* Functions to save and restore machine-specific function data. */
17759 static struct machine_function *
17760 arm_init_machine_status (void)
17762 struct machine_function *machine;
17763 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
17765 #if ARM_FT_UNKNOWN != 0
17766 machine->func_type = ARM_FT_UNKNOWN;
17771 /* Return an RTX indicating where the return address to the
17772 calling function can be found. */
17774 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
17779 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
17782 /* Do anything needed before RTL is emitted for each function. */
17784 arm_init_expanders (void)
17786 /* Arrange to initialize and mark the machine per-function status. */
17787 init_machine_status = arm_init_machine_status;
17789 /* This is to stop the combine pass optimizing away the alignment
17790 adjustment of va_arg. */
17791 /* ??? It is claimed that this should not be necessary. */
17793 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
17797 /* Like arm_compute_initial_elimination offset. Simpler because there
17798 isn't an ABI specified frame pointer for Thumb. Instead, we set it
17799 to point at the base of the local variables after static stack
17800 space for a function has been allocated. */
17803 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
17805 arm_stack_offsets *offsets;
17807 offsets = arm_get_frame_offsets ();
17811 case ARG_POINTER_REGNUM:
17814 case STACK_POINTER_REGNUM:
17815 return offsets->outgoing_args - offsets->saved_args;
17817 case FRAME_POINTER_REGNUM:
17818 return offsets->soft_frame - offsets->saved_args;
17820 case ARM_HARD_FRAME_POINTER_REGNUM:
17821 return offsets->saved_regs - offsets->saved_args;
17823 case THUMB_HARD_FRAME_POINTER_REGNUM:
17824 return offsets->locals_base - offsets->saved_args;
17827 gcc_unreachable ();
17831 case FRAME_POINTER_REGNUM:
17834 case STACK_POINTER_REGNUM:
17835 return offsets->outgoing_args - offsets->soft_frame;
17837 case ARM_HARD_FRAME_POINTER_REGNUM:
17838 return offsets->saved_regs - offsets->soft_frame;
17840 case THUMB_HARD_FRAME_POINTER_REGNUM:
17841 return offsets->locals_base - offsets->soft_frame;
17844 gcc_unreachable ();
17849 gcc_unreachable ();
17853 /* Generate the rest of a function's prologue. */
17855 thumb1_expand_prologue (void)
17859 HOST_WIDE_INT amount;
17860 arm_stack_offsets *offsets;
17861 unsigned long func_type;
17863 unsigned long live_regs_mask;
17865 func_type = arm_current_func_type ();
17867 /* Naked functions don't have prologues. */
17868 if (IS_NAKED (func_type))
17871 if (IS_INTERRUPT (func_type))
17873 error ("interrupt Service Routines cannot be coded in Thumb mode");
17877 offsets = arm_get_frame_offsets ();
17878 live_regs_mask = offsets->saved_regs_mask;
17879 /* Load the pic register before setting the frame pointer,
17880 so we can use r7 as a temporary work register. */
17881 if (flag_pic && arm_pic_register != INVALID_REGNUM)
17882 arm_load_pic_register (live_regs_mask);
17884 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
17885 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
17886 stack_pointer_rtx);
17888 amount = offsets->outgoing_args - offsets->saved_regs;
17893 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17894 GEN_INT (- amount)));
17895 RTX_FRAME_RELATED_P (insn) = 1;
17901 /* The stack decrement is too big for an immediate value in a single
17902 insn. In theory we could issue multiple subtracts, but after
17903 three of them it becomes more space efficient to place the full
17904 value in the constant pool and load into a register. (Also the
17905 ARM debugger really likes to see only one stack decrement per
17906 function). So instead we look for a scratch register into which
17907 we can load the decrement, and then we subtract this from the
17908 stack pointer. Unfortunately on the thumb the only available
17909 scratch registers are the argument registers, and we cannot use
17910 these as they may hold arguments to the function. Instead we
17911 attempt to locate a call preserved register which is used by this
17912 function. If we can find one, then we know that it will have
17913 been pushed at the start of the prologue and so we can corrupt
17915 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
17916 if (live_regs_mask & (1 << regno))
17919 gcc_assert(regno <= LAST_LO_REGNUM);
17921 reg = gen_rtx_REG (SImode, regno);
17923 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
17925 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
17926 stack_pointer_rtx, reg));
17927 RTX_FRAME_RELATED_P (insn) = 1;
17928 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17929 plus_constant (stack_pointer_rtx,
17931 RTX_FRAME_RELATED_P (dwarf) = 1;
17932 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
17936 if (frame_pointer_needed)
17937 thumb_set_frame_pointer (offsets);
17939 /* If we are profiling, make sure no instructions are scheduled before
17940 the call to mcount. Similarly if the user has requested no
17941 scheduling in the prolog. Similarly if we want non-call exceptions
17942 using the EABI unwinder, to prevent faulting instructions from being
17943 swapped with a stack adjustment. */
17944 if (crtl->profile || !TARGET_SCHED_PROLOG
17945 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
17946 emit_insn (gen_blockage ());
17948 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
17949 if (live_regs_mask & 0xff)
17950 cfun->machine->lr_save_eliminated = 0;
17955 thumb1_expand_epilogue (void)
17957 HOST_WIDE_INT amount;
17958 arm_stack_offsets *offsets;
17961 /* Naked functions don't have prologues. */
17962 if (IS_NAKED (arm_current_func_type ()))
17965 offsets = arm_get_frame_offsets ();
17966 amount = offsets->outgoing_args - offsets->saved_regs;
17968 if (frame_pointer_needed)
17970 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
17971 amount = offsets->locals_base - offsets->saved_regs;
17974 gcc_assert (amount >= 0);
17978 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17979 GEN_INT (amount)));
17982 /* r3 is always free in the epilogue. */
17983 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
17985 emit_insn (gen_movsi (reg, GEN_INT (amount)));
17986 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
17990 /* Emit a USE (stack_pointer_rtx), so that
17991 the stack adjustment will not be deleted. */
17992 emit_insn (gen_prologue_use (stack_pointer_rtx));
17994 if (crtl->profile || !TARGET_SCHED_PROLOG)
17995 emit_insn (gen_blockage ());
17997 /* Emit a clobber for each insn that will be restored in the epilogue,
17998 so that flow2 will get register lifetimes correct. */
17999 for (regno = 0; regno < 13; regno++)
18000 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
18001 emit_clobber (gen_rtx_REG (SImode, regno));
18003 if (! df_regs_ever_live_p (LR_REGNUM))
18004 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
18008 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
18010 arm_stack_offsets *offsets;
18011 unsigned long live_regs_mask = 0;
18012 unsigned long l_mask;
18013 unsigned high_regs_pushed = 0;
18014 int cfa_offset = 0;
18017 if (IS_NAKED (arm_current_func_type ()))
18020 if (is_called_in_ARM_mode (current_function_decl))
18024 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
18025 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
18027 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
18029 /* Generate code sequence to switch us into Thumb mode. */
18030 /* The .code 32 directive has already been emitted by
18031 ASM_DECLARE_FUNCTION_NAME. */
18032 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
18033 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
18035 /* Generate a label, so that the debugger will notice the
18036 change in instruction sets. This label is also used by
18037 the assembler to bypass the ARM code when this function
18038 is called from a Thumb encoded function elsewhere in the
18039 same file. Hence the definition of STUB_NAME here must
18040 agree with the definition in gas/config/tc-arm.c. */
18042 #define STUB_NAME ".real_start_of"
18044 fprintf (f, "\t.code\t16\n");
18046 if (arm_dllexport_name_p (name))
18047 name = arm_strip_name_encoding (name);
18049 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
18050 fprintf (f, "\t.thumb_func\n");
18051 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
18054 if (crtl->args.pretend_args_size)
18056 /* Output unwind directive for the stack adjustment. */
18057 if (ARM_EABI_UNWIND_TABLES)
18058 fprintf (f, "\t.pad #%d\n",
18059 crtl->args.pretend_args_size);
18061 if (cfun->machine->uses_anonymous_args)
18065 fprintf (f, "\tpush\t{");
18067 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
18069 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
18070 regno <= LAST_ARG_REGNUM;
18072 asm_fprintf (f, "%r%s", regno,
18073 regno == LAST_ARG_REGNUM ? "" : ", ");
18075 fprintf (f, "}\n");
18078 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
18079 SP_REGNUM, SP_REGNUM,
18080 crtl->args.pretend_args_size);
18082 /* We don't need to record the stores for unwinding (would it
18083 help the debugger any if we did?), but record the change in
18084 the stack pointer. */
18085 if (dwarf2out_do_frame ())
18087 char *l = dwarf2out_cfi_label (false);
18089 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
18090 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
18094 /* Get the registers we are going to push. */
18095 offsets = arm_get_frame_offsets ();
18096 live_regs_mask = offsets->saved_regs_mask;
18097 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
18098 l_mask = live_regs_mask & 0x40ff;
18099 /* Then count how many other high registers will need to be pushed. */
18100 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
18102 if (TARGET_BACKTRACE)
18105 unsigned work_register;
18107 /* We have been asked to create a stack backtrace structure.
18108 The code looks like this:
18112 0 sub SP, #16 Reserve space for 4 registers.
18113 2 push {R7} Push low registers.
18114 4 add R7, SP, #20 Get the stack pointer before the push.
18115 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
18116 8 mov R7, PC Get hold of the start of this code plus 12.
18117 10 str R7, [SP, #16] Store it.
18118 12 mov R7, FP Get hold of the current frame pointer.
18119 14 str R7, [SP, #4] Store it.
18120 16 mov R7, LR Get hold of the current return address.
18121 18 str R7, [SP, #12] Store it.
18122 20 add R7, SP, #16 Point at the start of the backtrace structure.
18123 22 mov FP, R7 Put this value into the frame pointer. */
18125 work_register = thumb_find_work_register (live_regs_mask);
18127 if (ARM_EABI_UNWIND_TABLES)
18128 asm_fprintf (f, "\t.pad #16\n");
18131 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
18132 SP_REGNUM, SP_REGNUM);
18134 if (dwarf2out_do_frame ())
18136 char *l = dwarf2out_cfi_label (false);
18138 cfa_offset = cfa_offset + 16;
18139 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
18144 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
18145 offset = bit_count (l_mask) * UNITS_PER_WORD;
18150 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
18151 offset + 16 + crtl->args.pretend_args_size);
18153 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
18156 /* Make sure that the instruction fetching the PC is in the right place
18157 to calculate "start of backtrace creation code + 12". */
18160 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
18161 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
18163 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
18164 ARM_HARD_FRAME_POINTER_REGNUM);
18165 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
18170 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
18171 ARM_HARD_FRAME_POINTER_REGNUM);
18172 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
18174 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
18175 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
18179 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
18180 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
18182 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
18184 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
18185 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
18187 /* Optimization: If we are not pushing any low registers but we are going
18188 to push some high registers then delay our first push. This will just
18189 be a push of LR and we can combine it with the push of the first high
18191 else if ((l_mask & 0xff) != 0
18192 || (high_regs_pushed == 0 && l_mask))
18193 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
18195 if (high_regs_pushed)
18197 unsigned pushable_regs;
18198 unsigned next_hi_reg;
18200 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
18201 if (live_regs_mask & (1 << next_hi_reg))
18204 pushable_regs = l_mask & 0xff;
18206 if (pushable_regs == 0)
18207 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
18209 while (high_regs_pushed > 0)
18211 unsigned long real_regs_mask = 0;
18213 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
18215 if (pushable_regs & (1 << regno))
18217 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
18219 high_regs_pushed --;
18220 real_regs_mask |= (1 << next_hi_reg);
18222 if (high_regs_pushed)
18224 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
18226 if (live_regs_mask & (1 << next_hi_reg))
18231 pushable_regs &= ~((1 << regno) - 1);
18237 /* If we had to find a work register and we have not yet
18238 saved the LR then add it to the list of regs to push. */
18239 if (l_mask == (1 << LR_REGNUM))
18241 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
18243 real_regs_mask | (1 << LR_REGNUM));
18247 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
18252 /* Handle the case of a double word load into a low register from
18253 a computed memory address. The computed address may involve a
18254 register which is overwritten by the load. */
18256 thumb_load_double_from_address (rtx *operands)
18264 gcc_assert (GET_CODE (operands[0]) == REG);
18265 gcc_assert (GET_CODE (operands[1]) == MEM);
18267 /* Get the memory address. */
18268 addr = XEXP (operands[1], 0);
18270 /* Work out how the memory address is computed. */
18271 switch (GET_CODE (addr))
18274 operands[2] = adjust_address (operands[1], SImode, 4);
18276 if (REGNO (operands[0]) == REGNO (addr))
18278 output_asm_insn ("ldr\t%H0, %2", operands);
18279 output_asm_insn ("ldr\t%0, %1", operands);
18283 output_asm_insn ("ldr\t%0, %1", operands);
18284 output_asm_insn ("ldr\t%H0, %2", operands);
18289 /* Compute <address> + 4 for the high order load. */
18290 operands[2] = adjust_address (operands[1], SImode, 4);
18292 output_asm_insn ("ldr\t%0, %1", operands);
18293 output_asm_insn ("ldr\t%H0, %2", operands);
18297 arg1 = XEXP (addr, 0);
18298 arg2 = XEXP (addr, 1);
18300 if (CONSTANT_P (arg1))
18301 base = arg2, offset = arg1;
18303 base = arg1, offset = arg2;
18305 gcc_assert (GET_CODE (base) == REG);
18307 /* Catch the case of <address> = <reg> + <reg> */
18308 if (GET_CODE (offset) == REG)
18310 int reg_offset = REGNO (offset);
18311 int reg_base = REGNO (base);
18312 int reg_dest = REGNO (operands[0]);
18314 /* Add the base and offset registers together into the
18315 higher destination register. */
18316 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
18317 reg_dest + 1, reg_base, reg_offset);
18319 /* Load the lower destination register from the address in
18320 the higher destination register. */
18321 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
18322 reg_dest, reg_dest + 1);
18324 /* Load the higher destination register from its own address
18326 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
18327 reg_dest + 1, reg_dest + 1);
18331 /* Compute <address> + 4 for the high order load. */
18332 operands[2] = adjust_address (operands[1], SImode, 4);
18334 /* If the computed address is held in the low order register
18335 then load the high order register first, otherwise always
18336 load the low order register first. */
18337 if (REGNO (operands[0]) == REGNO (base))
18339 output_asm_insn ("ldr\t%H0, %2", operands);
18340 output_asm_insn ("ldr\t%0, %1", operands);
18344 output_asm_insn ("ldr\t%0, %1", operands);
18345 output_asm_insn ("ldr\t%H0, %2", operands);
18351 /* With no registers to worry about we can just load the value
18353 operands[2] = adjust_address (operands[1], SImode, 4);
18355 output_asm_insn ("ldr\t%H0, %2", operands);
18356 output_asm_insn ("ldr\t%0, %1", operands);
18360 gcc_unreachable ();
18367 thumb_output_move_mem_multiple (int n, rtx *operands)
18374 if (REGNO (operands[4]) > REGNO (operands[5]))
18377 operands[4] = operands[5];
18380 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
18381 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
18385 if (REGNO (operands[4]) > REGNO (operands[5]))
18388 operands[4] = operands[5];
18391 if (REGNO (operands[5]) > REGNO (operands[6]))
18394 operands[5] = operands[6];
18397 if (REGNO (operands[4]) > REGNO (operands[5]))
18400 operands[4] = operands[5];
18404 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
18405 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
18409 gcc_unreachable ();
18415 /* Output a call-via instruction for thumb state. */
18417 thumb_call_via_reg (rtx reg)
18419 int regno = REGNO (reg);
18422 gcc_assert (regno < LR_REGNUM);
18424 /* If we are in the normal text section we can use a single instance
18425 per compilation unit. If we are doing function sections, then we need
18426 an entry per section, since we can't rely on reachability. */
18427 if (in_section == text_section)
18429 thumb_call_reg_needed = 1;
18431 if (thumb_call_via_label[regno] == NULL)
18432 thumb_call_via_label[regno] = gen_label_rtx ();
18433 labelp = thumb_call_via_label + regno;
18437 if (cfun->machine->call_via[regno] == NULL)
18438 cfun->machine->call_via[regno] = gen_label_rtx ();
18439 labelp = cfun->machine->call_via + regno;
18442 output_asm_insn ("bl\t%a0", labelp);
18446 /* Routines for generating rtl. */
18448 thumb_expand_movmemqi (rtx *operands)
18450 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
18451 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
18452 HOST_WIDE_INT len = INTVAL (operands[2]);
18453 HOST_WIDE_INT offset = 0;
18457 emit_insn (gen_movmem12b (out, in, out, in));
18463 emit_insn (gen_movmem8b (out, in, out, in));
18469 rtx reg = gen_reg_rtx (SImode);
18470 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
18471 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
18478 rtx reg = gen_reg_rtx (HImode);
18479 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
18480 plus_constant (in, offset))));
18481 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
18489 rtx reg = gen_reg_rtx (QImode);
18490 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
18491 plus_constant (in, offset))));
18492 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
18498 thumb_reload_out_hi (rtx *operands)
18500 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
18503 /* Handle reading a half-word from memory during reload. */
18505 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
18507 gcc_unreachable ();
18510 /* Return the length of a function name prefix
18511 that starts with the character 'c'. */
18513 arm_get_strip_length (int c)
18517 ARM_NAME_ENCODING_LENGTHS
18522 /* Return a pointer to a function's name with any
18523 and all prefix encodings stripped from it. */
18525 arm_strip_name_encoding (const char *name)
18529 while ((skip = arm_get_strip_length (* name)))
18535 /* If there is a '*' anywhere in the name's prefix, then
18536 emit the stripped name verbatim, otherwise prepend an
18537 underscore if leading underscores are being used. */
18539 arm_asm_output_labelref (FILE *stream, const char *name)
18544 while ((skip = arm_get_strip_length (* name)))
18546 verbatim |= (*name == '*');
18551 fputs (name, stream);
18553 asm_fprintf (stream, "%U%s", name);
18557 arm_file_start (void)
18561 if (TARGET_UNIFIED_ASM)
18562 asm_fprintf (asm_out_file, "\t.syntax unified\n");
18566 const char *fpu_name;
18567 if (arm_select[0].string)
18568 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
18569 else if (arm_select[1].string)
18570 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
18572 asm_fprintf (asm_out_file, "\t.cpu %s\n",
18573 all_cores[arm_default_cpu].name);
18575 if (TARGET_SOFT_FLOAT)
18578 fpu_name = "softvfp";
18580 fpu_name = "softfpa";
18584 int set_float_abi_attributes = 0;
18585 switch (arm_fpu_arch)
18590 case FPUTYPE_FPA_EMU2:
18593 case FPUTYPE_FPA_EMU3:
18596 case FPUTYPE_MAVERICK:
18597 fpu_name = "maverick";
18601 set_float_abi_attributes = 1;
18603 case FPUTYPE_VFP3D16:
18604 fpu_name = "vfpv3-d16";
18605 set_float_abi_attributes = 1;
18608 fpu_name = "vfpv3";
18609 set_float_abi_attributes = 1;
18613 set_float_abi_attributes = 1;
18615 case FPUTYPE_NEON_FP16:
18616 fpu_name = "neon-fp16";
18617 set_float_abi_attributes = 1;
18622 if (set_float_abi_attributes)
18624 if (TARGET_HARD_FLOAT)
18625 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
18626 if (TARGET_HARD_FLOAT_ABI)
18627 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
18630 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
18632 /* Some of these attributes only apply when the corresponding features
18633 are used. However we don't have any easy way of figuring this out.
18634 Conservatively record the setting that would have been used. */
18636 /* Tag_ABI_FP_rounding. */
18637 if (flag_rounding_math)
18638 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
18639 if (!flag_unsafe_math_optimizations)
18641 /* Tag_ABI_FP_denomal. */
18642 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
18643 /* Tag_ABI_FP_exceptions. */
18644 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
18646 /* Tag_ABI_FP_user_exceptions. */
18647 if (flag_signaling_nans)
18648 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
18649 /* Tag_ABI_FP_number_model. */
18650 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
18651 flag_finite_math_only ? 1 : 3);
18653 /* Tag_ABI_align8_needed. */
18654 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
18655 /* Tag_ABI_align8_preserved. */
18656 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
18657 /* Tag_ABI_enum_size. */
18658 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
18659 flag_short_enums ? 1 : 2);
18661 /* Tag_ABI_optimization_goals. */
18664 else if (optimize >= 2)
18670 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
18672 /* Tag_ABI_FP_16bit_format. */
18673 if (arm_fp16_format)
18674 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
18675 (int)arm_fp16_format);
18677 if (arm_lang_output_object_attributes_hook)
18678 arm_lang_output_object_attributes_hook();
18680 default_file_start();
18684 arm_file_end (void)
18688 if (NEED_INDICATE_EXEC_STACK)
18689 /* Add .note.GNU-stack. */
18690 file_end_indicate_exec_stack ();
18692 if (! thumb_call_reg_needed)
18695 switch_to_section (text_section);
18696 asm_fprintf (asm_out_file, "\t.code 16\n");
18697 ASM_OUTPUT_ALIGN (asm_out_file, 1);
18699 for (regno = 0; regno < LR_REGNUM; regno++)
18701 rtx label = thumb_call_via_label[regno];
18705 targetm.asm_out.internal_label (asm_out_file, "L",
18706 CODE_LABEL_NUMBER (label));
18707 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
18713 /* Symbols in the text segment can be accessed without indirecting via the
18714 constant pool; it may take an extra binary operation, but this is still
18715 faster than indirecting via memory. Don't do this when not optimizing,
18716 since we won't be calculating al of the offsets necessary to do this
18720 arm_encode_section_info (tree decl, rtx rtl, int first)
18722 if (optimize > 0 && TREE_CONSTANT (decl))
18723 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
18725 default_encode_section_info (decl, rtl, first);
18727 #endif /* !ARM_PE */
18730 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
18732 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
18733 && !strcmp (prefix, "L"))
18735 arm_ccfsm_state = 0;
18736 arm_target_insn = NULL;
18738 default_internal_label (stream, prefix, labelno);
18741 /* Output code to add DELTA to the first argument, and then jump
18742 to FUNCTION. Used for C++ multiple inheritance. */
18744 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
18745 HOST_WIDE_INT delta,
18746 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
18749 static int thunk_label = 0;
18752 int mi_delta = delta;
18753 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
18755 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
18758 mi_delta = - mi_delta;
18762 int labelno = thunk_label++;
18763 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
18764 /* Thunks are entered in arm mode when avaiable. */
18765 if (TARGET_THUMB1_ONLY)
18767 /* push r3 so we can use it as a temporary. */
18768 /* TODO: Omit this save if r3 is not used. */
18769 fputs ("\tpush {r3}\n", file);
18770 fputs ("\tldr\tr3, ", file);
18774 fputs ("\tldr\tr12, ", file);
18776 assemble_name (file, label);
18777 fputc ('\n', file);
18780 /* If we are generating PIC, the ldr instruction below loads
18781 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
18782 the address of the add + 8, so we have:
18784 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
18787 Note that we have "+ 1" because some versions of GNU ld
18788 don't set the low bit of the result for R_ARM_REL32
18789 relocations against thumb function symbols.
18790 On ARMv6M this is +4, not +8. */
18791 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
18792 assemble_name (file, labelpc);
18793 fputs (":\n", file);
18794 if (TARGET_THUMB1_ONLY)
18796 /* This is 2 insns after the start of the thunk, so we know it
18797 is 4-byte aligned. */
18798 fputs ("\tadd\tr3, pc, r3\n", file);
18799 fputs ("\tmov r12, r3\n", file);
18802 fputs ("\tadd\tr12, pc, r12\n", file);
18804 else if (TARGET_THUMB1_ONLY)
18805 fputs ("\tmov r12, r3\n", file);
18807 if (TARGET_THUMB1_ONLY)
18809 if (mi_delta > 255)
18811 fputs ("\tldr\tr3, ", file);
18812 assemble_name (file, label);
18813 fputs ("+4\n", file);
18814 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
18815 mi_op, this_regno, this_regno);
18817 else if (mi_delta != 0)
18819 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
18820 mi_op, this_regno, this_regno,
18826 /* TODO: Use movw/movt for large constants when available. */
18827 while (mi_delta != 0)
18829 if ((mi_delta & (3 << shift)) == 0)
18833 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
18834 mi_op, this_regno, this_regno,
18835 mi_delta & (0xff << shift));
18836 mi_delta &= ~(0xff << shift);
18843 if (TARGET_THUMB1_ONLY)
18844 fputs ("\tpop\t{r3}\n", file);
18846 fprintf (file, "\tbx\tr12\n");
18847 ASM_OUTPUT_ALIGN (file, 2);
18848 assemble_name (file, label);
18849 fputs (":\n", file);
18852 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
18853 rtx tem = XEXP (DECL_RTL (function), 0);
18854 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
18855 tem = gen_rtx_MINUS (GET_MODE (tem),
18857 gen_rtx_SYMBOL_REF (Pmode,
18858 ggc_strdup (labelpc)));
18859 assemble_integer (tem, 4, BITS_PER_WORD, 1);
18862 /* Output ".word .LTHUNKn". */
18863 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
18865 if (TARGET_THUMB1_ONLY && mi_delta > 255)
18866 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
18870 fputs ("\tb\t", file);
18871 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
18872 if (NEED_PLT_RELOC)
18873 fputs ("(PLT)", file);
18874 fputc ('\n', file);
18879 arm_emit_vector_const (FILE *file, rtx x)
18882 const char * pattern;
18884 gcc_assert (GET_CODE (x) == CONST_VECTOR);
18886 switch (GET_MODE (x))
18888 case V2SImode: pattern = "%08x"; break;
18889 case V4HImode: pattern = "%04x"; break;
18890 case V8QImode: pattern = "%02x"; break;
18891 default: gcc_unreachable ();
18894 fprintf (file, "0x");
18895 for (i = CONST_VECTOR_NUNITS (x); i--;)
18899 element = CONST_VECTOR_ELT (x, i);
18900 fprintf (file, pattern, INTVAL (element));
18906 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
18907 HFmode constant pool entries are actually loaded with ldr. */
18909 arm_emit_fp16_const (rtx c)
18914 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
18915 bits = real_to_target (NULL, &r, HFmode);
18916 if (WORDS_BIG_ENDIAN)
18917 assemble_zeros (2);
18918 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
18919 if (!WORDS_BIG_ENDIAN)
18920 assemble_zeros (2);
18924 arm_output_load_gr (rtx *operands)
18931 if (GET_CODE (operands [1]) != MEM
18932 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
18933 || GET_CODE (reg = XEXP (sum, 0)) != REG
18934 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
18935 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
18936 return "wldrw%?\t%0, %1";
18938 /* Fix up an out-of-range load of a GR register. */
18939 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
18940 wcgr = operands[0];
18942 output_asm_insn ("ldr%?\t%0, %1", operands);
18944 operands[0] = wcgr;
18946 output_asm_insn ("tmcr%?\t%0, %1", operands);
18947 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
18952 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
18954 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
18955 named arg and all anonymous args onto the stack.
18956 XXX I know the prologue shouldn't be pushing registers, but it is faster
18960 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
18961 enum machine_mode mode,
18964 int second_time ATTRIBUTE_UNUSED)
18966 int nregs = cum->nregs;
18968 && ARM_DOUBLEWORD_ALIGN
18969 && arm_needs_doubleword_align (mode, type))
18972 cfun->machine->uses_anonymous_args = 1;
18973 if (nregs < NUM_ARG_REGS)
18974 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
18977 /* Return nonzero if the CONSUMER instruction (a store) does not need
18978 PRODUCER's value to calculate the address. */
18981 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
18983 rtx value = PATTERN (producer);
18984 rtx addr = PATTERN (consumer);
18986 if (GET_CODE (value) == COND_EXEC)
18987 value = COND_EXEC_CODE (value);
18988 if (GET_CODE (value) == PARALLEL)
18989 value = XVECEXP (value, 0, 0);
18990 value = XEXP (value, 0);
18991 if (GET_CODE (addr) == COND_EXEC)
18992 addr = COND_EXEC_CODE (addr);
18993 if (GET_CODE (addr) == PARALLEL)
18994 addr = XVECEXP (addr, 0, 0);
18995 addr = XEXP (addr, 0);
18997 return !reg_overlap_mentioned_p (value, addr);
19000 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
19001 have an early register shift value or amount dependency on the
19002 result of PRODUCER. */
19005 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
19007 rtx value = PATTERN (producer);
19008 rtx op = PATTERN (consumer);
19011 if (GET_CODE (value) == COND_EXEC)
19012 value = COND_EXEC_CODE (value);
19013 if (GET_CODE (value) == PARALLEL)
19014 value = XVECEXP (value, 0, 0);
19015 value = XEXP (value, 0);
19016 if (GET_CODE (op) == COND_EXEC)
19017 op = COND_EXEC_CODE (op);
19018 if (GET_CODE (op) == PARALLEL)
19019 op = XVECEXP (op, 0, 0);
19022 early_op = XEXP (op, 0);
19023 /* This is either an actual independent shift, or a shift applied to
19024 the first operand of another operation. We want the whole shift
19026 if (GET_CODE (early_op) == REG)
19029 return !reg_overlap_mentioned_p (value, early_op);
19032 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
19033 have an early register shift value dependency on the result of
19037 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
19039 rtx value = PATTERN (producer);
19040 rtx op = PATTERN (consumer);
19043 if (GET_CODE (value) == COND_EXEC)
19044 value = COND_EXEC_CODE (value);
19045 if (GET_CODE (value) == PARALLEL)
19046 value = XVECEXP (value, 0, 0);
19047 value = XEXP (value, 0);
19048 if (GET_CODE (op) == COND_EXEC)
19049 op = COND_EXEC_CODE (op);
19050 if (GET_CODE (op) == PARALLEL)
19051 op = XVECEXP (op, 0, 0);
19054 early_op = XEXP (op, 0);
19056 /* This is either an actual independent shift, or a shift applied to
19057 the first operand of another operation. We want the value being
19058 shifted, in either case. */
19059 if (GET_CODE (early_op) != REG)
19060 early_op = XEXP (early_op, 0);
19062 return !reg_overlap_mentioned_p (value, early_op);
19065 /* Return nonzero if the CONSUMER (a mul or mac op) does not
19066 have an early register mult dependency on the result of
19070 arm_no_early_mul_dep (rtx producer, rtx consumer)
19072 rtx value = PATTERN (producer);
19073 rtx op = PATTERN (consumer);
19075 if (GET_CODE (value) == COND_EXEC)
19076 value = COND_EXEC_CODE (value);
19077 if (GET_CODE (value) == PARALLEL)
19078 value = XVECEXP (value, 0, 0);
19079 value = XEXP (value, 0);
19080 if (GET_CODE (op) == COND_EXEC)
19081 op = COND_EXEC_CODE (op);
19082 if (GET_CODE (op) == PARALLEL)
19083 op = XVECEXP (op, 0, 0);
19086 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
19088 if (GET_CODE (XEXP (op, 0)) == MULT)
19089 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
19091 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
19097 /* We can't rely on the caller doing the proper promotion when
19098 using APCS or ATPCS. */
19101 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
19103 return !TARGET_AAPCS_BASED;
19107 /* AAPCS based ABIs use short enums by default. */
19110 arm_default_short_enums (void)
19112 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
19116 /* AAPCS requires that anonymous bitfields affect structure alignment. */
19119 arm_align_anon_bitfield (void)
19121 return TARGET_AAPCS_BASED;
19125 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
19128 arm_cxx_guard_type (void)
19130 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
19133 /* Return non-zero if the consumer (a multiply-accumulate instruction)
19134 has an accumulator dependency on the result of the producer (a
19135 multiplication instruction) and no other dependency on that result. */
19137 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
19139 rtx mul = PATTERN (producer);
19140 rtx mac = PATTERN (consumer);
19142 rtx mac_op0, mac_op1, mac_acc;
19144 if (GET_CODE (mul) == COND_EXEC)
19145 mul = COND_EXEC_CODE (mul);
19146 if (GET_CODE (mac) == COND_EXEC)
19147 mac = COND_EXEC_CODE (mac);
19149 /* Check that mul is of the form (set (...) (mult ...))
19150 and mla is of the form (set (...) (plus (mult ...) (...))). */
19151 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
19152 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
19153 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
19156 mul_result = XEXP (mul, 0);
19157 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
19158 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
19159 mac_acc = XEXP (XEXP (mac, 1), 1);
19161 return (reg_overlap_mentioned_p (mul_result, mac_acc)
19162 && !reg_overlap_mentioned_p (mul_result, mac_op0)
19163 && !reg_overlap_mentioned_p (mul_result, mac_op1));
19167 /* The EABI says test the least significant bit of a guard variable. */
19170 arm_cxx_guard_mask_bit (void)
19172 return TARGET_AAPCS_BASED;
19176 /* The EABI specifies that all array cookies are 8 bytes long. */
19179 arm_get_cookie_size (tree type)
19183 if (!TARGET_AAPCS_BASED)
19184 return default_cxx_get_cookie_size (type);
19186 size = build_int_cst (sizetype, 8);
19191 /* The EABI says that array cookies should also contain the element size. */
19194 arm_cookie_has_size (void)
19196 return TARGET_AAPCS_BASED;
19200 /* The EABI says constructors and destructors should return a pointer to
19201 the object constructed/destroyed. */
19204 arm_cxx_cdtor_returns_this (void)
19206 return TARGET_AAPCS_BASED;
19209 /* The EABI says that an inline function may never be the key
19213 arm_cxx_key_method_may_be_inline (void)
19215 return !TARGET_AAPCS_BASED;
19219 arm_cxx_determine_class_data_visibility (tree decl)
19221 if (!TARGET_AAPCS_BASED
19222 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
19225 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
19226 is exported. However, on systems without dynamic vague linkage,
19227 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
19228 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
19229 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
19231 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
19232 DECL_VISIBILITY_SPECIFIED (decl) = 1;
19236 arm_cxx_class_data_always_comdat (void)
19238 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
19239 vague linkage if the class has no key function. */
19240 return !TARGET_AAPCS_BASED;
19244 /* The EABI says __aeabi_atexit should be used to register static
19248 arm_cxx_use_aeabi_atexit (void)
19250 return TARGET_AAPCS_BASED;
19255 arm_set_return_address (rtx source, rtx scratch)
19257 arm_stack_offsets *offsets;
19258 HOST_WIDE_INT delta;
19260 unsigned long saved_regs;
19262 offsets = arm_get_frame_offsets ();
19263 saved_regs = offsets->saved_regs_mask;
19265 if ((saved_regs & (1 << LR_REGNUM)) == 0)
19266 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
19269 if (frame_pointer_needed)
19270 addr = plus_constant(hard_frame_pointer_rtx, -4);
19273 /* LR will be the first saved register. */
19274 delta = offsets->outgoing_args - (offsets->frame + 4);
19279 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
19280 GEN_INT (delta & ~4095)));
19285 addr = stack_pointer_rtx;
19287 addr = plus_constant (addr, delta);
19289 emit_move_insn (gen_frame_mem (Pmode, addr), source);
19295 thumb_set_return_address (rtx source, rtx scratch)
19297 arm_stack_offsets *offsets;
19298 HOST_WIDE_INT delta;
19299 HOST_WIDE_INT limit;
19302 unsigned long mask;
19306 offsets = arm_get_frame_offsets ();
19307 mask = offsets->saved_regs_mask;
19308 if (mask & (1 << LR_REGNUM))
19311 /* Find the saved regs. */
19312 if (frame_pointer_needed)
19314 delta = offsets->soft_frame - offsets->saved_args;
19315 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
19321 delta = offsets->outgoing_args - offsets->saved_args;
19324 /* Allow for the stack frame. */
19325 if (TARGET_THUMB1 && TARGET_BACKTRACE)
19327 /* The link register is always the first saved register. */
19330 /* Construct the address. */
19331 addr = gen_rtx_REG (SImode, reg);
19334 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
19335 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
19339 addr = plus_constant (addr, delta);
19341 emit_move_insn (gen_frame_mem (Pmode, addr), source);
19344 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
19347 /* Implements target hook vector_mode_supported_p. */
19349 arm_vector_mode_supported_p (enum machine_mode mode)
19351 /* Neon also supports V2SImode, etc. listed in the clause below. */
19352 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
19353 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
19356 if ((mode == V2SImode)
19357 || (mode == V4HImode)
19358 || (mode == V8QImode))
19364 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
19365 ARM insns and therefore guarantee that the shift count is modulo 256.
19366 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
19367 guarantee no particular behavior for out-of-range counts. */
19369 static unsigned HOST_WIDE_INT
19370 arm_shift_truncation_mask (enum machine_mode mode)
19372 return mode == SImode ? 255 : 0;
19376 /* Map internal gcc register numbers to DWARF2 register numbers. */
19379 arm_dbx_register_number (unsigned int regno)
19384 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
19385 compatibility. The EABI defines them as registers 96-103. */
19386 if (IS_FPA_REGNUM (regno))
19387 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
19389 /* FIXME: VFPv3 register numbering. */
19390 if (IS_VFP_REGNUM (regno))
19391 return 64 + regno - FIRST_VFP_REGNUM;
19393 if (IS_IWMMXT_GR_REGNUM (regno))
19394 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
19396 if (IS_IWMMXT_REGNUM (regno))
19397 return 112 + regno - FIRST_IWMMXT_REGNUM;
19399 gcc_unreachable ();
19403 #ifdef TARGET_UNWIND_INFO
19404 /* Emit unwind directives for a store-multiple instruction or stack pointer
19405 push during alignment.
19406 These should only ever be generated by the function prologue code, so
19407 expect them to have a particular form. */
19410 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
19413 HOST_WIDE_INT offset;
19414 HOST_WIDE_INT nregs;
19420 e = XVECEXP (p, 0, 0);
19421 if (GET_CODE (e) != SET)
19424 /* First insn will adjust the stack pointer. */
19425 if (GET_CODE (e) != SET
19426 || GET_CODE (XEXP (e, 0)) != REG
19427 || REGNO (XEXP (e, 0)) != SP_REGNUM
19428 || GET_CODE (XEXP (e, 1)) != PLUS)
19431 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
19432 nregs = XVECLEN (p, 0) - 1;
19434 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
19437 /* The function prologue may also push pc, but not annotate it as it is
19438 never restored. We turn this into a stack pointer adjustment. */
19439 if (nregs * 4 == offset - 4)
19441 fprintf (asm_out_file, "\t.pad #4\n");
19445 fprintf (asm_out_file, "\t.save {");
19447 else if (IS_VFP_REGNUM (reg))
19450 fprintf (asm_out_file, "\t.vsave {");
19452 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
19454 /* FPA registers are done differently. */
19455 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
19459 /* Unknown register type. */
19462 /* If the stack increment doesn't match the size of the saved registers,
19463 something has gone horribly wrong. */
19464 if (offset != nregs * reg_size)
19469 /* The remaining insns will describe the stores. */
19470 for (i = 1; i <= nregs; i++)
19472 /* Expect (set (mem <addr>) (reg)).
19473 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
19474 e = XVECEXP (p, 0, i);
19475 if (GET_CODE (e) != SET
19476 || GET_CODE (XEXP (e, 0)) != MEM
19477 || GET_CODE (XEXP (e, 1)) != REG)
19480 reg = REGNO (XEXP (e, 1));
19485 fprintf (asm_out_file, ", ");
19486 /* We can't use %r for vfp because we need to use the
19487 double precision register names. */
19488 if (IS_VFP_REGNUM (reg))
19489 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
19491 asm_fprintf (asm_out_file, "%r", reg);
19493 #ifdef ENABLE_CHECKING
19494 /* Check that the addresses are consecutive. */
19495 e = XEXP (XEXP (e, 0), 0);
19496 if (GET_CODE (e) == PLUS)
19498 offset += reg_size;
19499 if (GET_CODE (XEXP (e, 0)) != REG
19500 || REGNO (XEXP (e, 0)) != SP_REGNUM
19501 || GET_CODE (XEXP (e, 1)) != CONST_INT
19502 || offset != INTVAL (XEXP (e, 1)))
19506 || GET_CODE (e) != REG
19507 || REGNO (e) != SP_REGNUM)
19511 fprintf (asm_out_file, "}\n");
19514 /* Emit unwind directives for a SET. */
19517 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
19525 switch (GET_CODE (e0))
19528 /* Pushing a single register. */
19529 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
19530 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
19531 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
19534 asm_fprintf (asm_out_file, "\t.save ");
19535 if (IS_VFP_REGNUM (REGNO (e1)))
19536 asm_fprintf(asm_out_file, "{d%d}\n",
19537 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
19539 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
19543 if (REGNO (e0) == SP_REGNUM)
19545 /* A stack increment. */
19546 if (GET_CODE (e1) != PLUS
19547 || GET_CODE (XEXP (e1, 0)) != REG
19548 || REGNO (XEXP (e1, 0)) != SP_REGNUM
19549 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
19552 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
19553 -INTVAL (XEXP (e1, 1)));
19555 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
19557 HOST_WIDE_INT offset;
19559 if (GET_CODE (e1) == PLUS)
19561 if (GET_CODE (XEXP (e1, 0)) != REG
19562 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
19564 reg = REGNO (XEXP (e1, 0));
19565 offset = INTVAL (XEXP (e1, 1));
19566 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
19567 HARD_FRAME_POINTER_REGNUM, reg,
19568 INTVAL (XEXP (e1, 1)));
19570 else if (GET_CODE (e1) == REG)
19573 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
19574 HARD_FRAME_POINTER_REGNUM, reg);
19579 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
19581 /* Move from sp to reg. */
19582 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
19584 else if (GET_CODE (e1) == PLUS
19585 && GET_CODE (XEXP (e1, 0)) == REG
19586 && REGNO (XEXP (e1, 0)) == SP_REGNUM
19587 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
19589 /* Set reg to offset from sp. */
19590 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
19591 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
19593 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
19595 /* Stack pointer save before alignment. */
19597 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
19610 /* Emit unwind directives for the given insn. */
19613 arm_unwind_emit (FILE * asm_out_file, rtx insn)
19617 if (!ARM_EABI_UNWIND_TABLES)
19620 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
19621 && (TREE_NOTHROW (current_function_decl)
19622 || crtl->all_throwers_are_sibcalls))
19625 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
19628 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
19630 pat = XEXP (pat, 0);
19632 pat = PATTERN (insn);
19634 switch (GET_CODE (pat))
19637 arm_unwind_emit_set (asm_out_file, pat);
19641 /* Store multiple. */
19642 arm_unwind_emit_sequence (asm_out_file, pat);
19651 /* Output a reference from a function exception table to the type_info
19652 object X. The EABI specifies that the symbol should be relocated by
19653 an R_ARM_TARGET2 relocation. */
19656 arm_output_ttype (rtx x)
19658 fputs ("\t.word\t", asm_out_file);
19659 output_addr_const (asm_out_file, x);
19660 /* Use special relocations for symbol references. */
19661 if (GET_CODE (x) != CONST_INT)
19662 fputs ("(TARGET2)", asm_out_file);
19663 fputc ('\n', asm_out_file);
19667 #endif /* TARGET_UNWIND_INFO */
19670 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
19671 stack alignment. */
19674 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
19676 rtx unspec = SET_SRC (pattern);
19677 gcc_assert (GET_CODE (unspec) == UNSPEC);
19681 case UNSPEC_STACK_ALIGN:
19682 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
19683 put anything on the stack, so hopefully it won't matter.
19684 CFA = SP will be correct after alignment. */
19685 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
19686 SET_DEST (pattern));
19689 gcc_unreachable ();
19694 /* Output unwind directives for the start/end of a function. */
19697 arm_output_fn_unwind (FILE * f, bool prologue)
19699 if (!ARM_EABI_UNWIND_TABLES)
19703 fputs ("\t.fnstart\n", f);
19706 /* If this function will never be unwound, then mark it as such.
19707 The came condition is used in arm_unwind_emit to suppress
19708 the frame annotations. */
19709 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
19710 && (TREE_NOTHROW (current_function_decl)
19711 || crtl->all_throwers_are_sibcalls))
19712 fputs("\t.cantunwind\n", f);
19714 fputs ("\t.fnend\n", f);
19719 arm_emit_tls_decoration (FILE *fp, rtx x)
19721 enum tls_reloc reloc;
19724 val = XVECEXP (x, 0, 0);
19725 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
19727 output_addr_const (fp, val);
19732 fputs ("(tlsgd)", fp);
19735 fputs ("(tlsldm)", fp);
19738 fputs ("(tlsldo)", fp);
19741 fputs ("(gottpoff)", fp);
19744 fputs ("(tpoff)", fp);
19747 gcc_unreachable ();
19755 fputs (" + (. - ", fp);
19756 output_addr_const (fp, XVECEXP (x, 0, 2));
19758 output_addr_const (fp, XVECEXP (x, 0, 3));
19768 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
19771 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
19773 gcc_assert (size == 4);
19774 fputs ("\t.word\t", file);
19775 output_addr_const (file, x);
19776 fputs ("(tlsldo)", file);
19780 arm_output_addr_const_extra (FILE *fp, rtx x)
19782 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
19783 return arm_emit_tls_decoration (fp, x);
19784 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
19787 int labelno = INTVAL (XVECEXP (x, 0, 0));
19789 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
19790 assemble_name_raw (fp, label);
19794 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
19796 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
19800 output_addr_const (fp, XVECEXP (x, 0, 0));
19804 else if (GET_CODE (x) == CONST_VECTOR)
19805 return arm_emit_vector_const (fp, x);
19810 /* Output assembly for a shift instruction.
19811 SET_FLAGS determines how the instruction modifies the condition codes.
19812 0 - Do not set condition codes.
19813 1 - Set condition codes.
19814 2 - Use smallest instruction. */
19816 arm_output_shift(rtx * operands, int set_flags)
19819 static const char flag_chars[3] = {'?', '.', '!'};
19824 c = flag_chars[set_flags];
19825 if (TARGET_UNIFIED_ASM)
19827 shift = shift_op(operands[3], &val);
19831 operands[2] = GEN_INT(val);
19832 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
19835 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
19838 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
19839 output_asm_insn (pattern, operands);
19843 /* Output a Thumb-2 casesi instruction. */
19845 thumb2_output_casesi (rtx *operands)
19847 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
19849 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
19851 output_asm_insn ("cmp\t%0, %1", operands);
19852 output_asm_insn ("bhi\t%l3", operands);
19853 switch (GET_MODE(diff_vec))
19856 return "tbb\t[%|pc, %0]";
19858 return "tbh\t[%|pc, %0, lsl #1]";
19862 output_asm_insn ("adr\t%4, %l2", operands);
19863 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
19864 output_asm_insn ("add\t%4, %4, %5", operands);
19869 output_asm_insn ("adr\t%4, %l2", operands);
19870 return "ldr\t%|pc, [%4, %0, lsl #2]";
19873 gcc_unreachable ();
19877 /* Most ARM cores are single issue, but some newer ones can dual issue.
19878 The scheduler descriptions rely on this being correct. */
19880 arm_issue_rate (void)
19895 /* A table and a function to perform ARM-specific name mangling for
19896 NEON vector types in order to conform to the AAPCS (see "Procedure
19897 Call Standard for the ARM Architecture", Appendix A). To qualify
19898 for emission with the mangled names defined in that document, a
19899 vector type must not only be of the correct mode but also be
19900 composed of NEON vector element types (e.g. __builtin_neon_qi). */
19903 enum machine_mode mode;
19904 const char *element_type_name;
19905 const char *aapcs_name;
19906 } arm_mangle_map_entry;
19908 static arm_mangle_map_entry arm_mangle_map[] = {
19909 /* 64-bit containerized types. */
19910 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
19911 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
19912 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
19913 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
19914 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
19915 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
19916 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
19917 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
19918 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
19919 /* 128-bit containerized types. */
19920 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
19921 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
19922 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
19923 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
19924 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
19925 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
19926 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
19927 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
19928 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
19929 { VOIDmode, NULL, NULL }
19933 arm_mangle_type (const_tree type)
19935 arm_mangle_map_entry *pos = arm_mangle_map;
19937 /* The ARM ABI documents (10th October 2008) say that "__va_list"
19938 has to be managled as if it is in the "std" namespace. */
19939 if (TARGET_AAPCS_BASED
19940 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
19942 static bool warned;
19943 if (!warned && warn_psabi)
19946 inform (input_location,
19947 "the mangling of %<va_list%> has changed in GCC 4.4");
19949 return "St9__va_list";
19952 /* Half-precision float. */
19953 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
19956 if (TREE_CODE (type) != VECTOR_TYPE)
19959 /* Check the mode of the vector type, and the name of the vector
19960 element type, against the table. */
19961 while (pos->mode != VOIDmode)
19963 tree elt_type = TREE_TYPE (type);
19965 if (pos->mode == TYPE_MODE (type)
19966 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
19967 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
19968 pos->element_type_name))
19969 return pos->aapcs_name;
19974 /* Use the default mangling for unrecognized (possibly user-defined)
19979 /* Order of allocation of core registers for Thumb: this allocation is
19980 written over the corresponding initial entries of the array
19981 initialized with REG_ALLOC_ORDER. We allocate all low registers
19982 first. Saving and restoring a low register is usually cheaper than
19983 using a call-clobbered high register. */
19985 static const int thumb_core_reg_alloc_order[] =
19987 3, 2, 1, 0, 4, 5, 6, 7,
19988 14, 12, 8, 9, 10, 11, 13, 15
19991 /* Adjust register allocation order when compiling for Thumb. */
19994 arm_order_regs_for_local_alloc (void)
19996 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
19997 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
19999 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
20000 sizeof (thumb_core_reg_alloc_order));
20003 /* Set default optimization options. */
20005 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
20007 /* Enable section anchors by default at -O1 or higher.
20008 Use 2 to distinguish from an explicit -fsection-anchors
20009 given on the command line. */
20011 flag_section_anchors = 2;
20014 #include "gt-arm.h"