1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "conditions.h"
38 #include "insn-attr.h"
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
58 /* Forward definitions of types. */
59 typedef struct minipool_node Mnode;
60 typedef struct minipool_fixup Mfix;
62 EXPORTED_CONST struct attribute_spec arm_attribute_table[];
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static int arm_compute_static_chain_stack_bytes (void);
68 static arm_stack_offsets *arm_get_frame_offsets (void);
69 static void arm_add_gc_roots (void);
70 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
71 HOST_WIDE_INT, rtx, rtx, int, int);
72 static unsigned bit_count (unsigned long);
73 static int arm_address_register_rtx_p (rtx, int);
74 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
75 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
76 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
77 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
78 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
79 inline static int thumb1_index_register_rtx_p (rtx, int);
80 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
81 static int thumb_far_jump_used_p (void);
82 static bool thumb_force_lr_save (void);
83 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
84 static rtx emit_sfm (int, int);
85 static unsigned arm_size_return_regs (void);
86 static bool arm_assemble_integer (rtx, unsigned int, int);
87 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
88 static arm_cc get_arm_condition_code (rtx);
89 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
90 static rtx is_jump_table (rtx);
91 static const char *output_multi_immediate (rtx *, const char *, const char *,
93 static const char *shift_op (rtx, HOST_WIDE_INT *);
94 static struct machine_function *arm_init_machine_status (void);
95 static void thumb_exit (FILE *, int);
96 static rtx is_jump_table (rtx);
97 static HOST_WIDE_INT get_jump_table_size (rtx);
98 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
99 static Mnode *add_minipool_forward_ref (Mfix *);
100 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
101 static Mnode *add_minipool_backward_ref (Mfix *);
102 static void assign_minipool_offsets (Mfix *);
103 static void arm_print_value (FILE *, rtx);
104 static void dump_minipool (rtx);
105 static int arm_barrier_cost (rtx);
106 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
107 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
108 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
110 static void arm_reorg (void);
111 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
112 static unsigned long arm_compute_save_reg0_reg12_mask (void);
113 static unsigned long arm_compute_save_reg_mask (void);
114 static unsigned long arm_isr_value (tree);
115 static unsigned long arm_compute_func_type (void);
116 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
117 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
118 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
119 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
121 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
122 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
123 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
124 static int arm_comp_type_attributes (const_tree, const_tree);
125 static void arm_set_default_type_attributes (tree);
126 static int arm_adjust_cost (rtx, rtx, rtx, int);
127 static int count_insns_for_constant (HOST_WIDE_INT, int);
128 static int arm_get_strip_length (int);
129 static bool arm_function_ok_for_sibcall (tree, tree);
130 static void arm_internal_label (FILE *, const char *, unsigned long);
131 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
133 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
134 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
135 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
136 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
137 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
138 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
139 static bool arm_rtx_costs (rtx, int, int, int *, bool);
140 static int arm_address_cost (rtx, bool);
141 static bool arm_memory_load_p (rtx);
142 static bool arm_cirrus_insn_p (rtx);
143 static void cirrus_reorg (rtx);
144 static void arm_init_builtins (void);
145 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
146 static void arm_init_iwmmxt_builtins (void);
147 static rtx safe_vector_operand (rtx, enum machine_mode);
148 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
149 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
150 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
151 static void emit_constant_insn (rtx cond, rtx pattern);
152 static rtx emit_set_insn (rtx, rtx);
153 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
156 #ifdef OBJECT_FORMAT_ELF
157 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
158 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
161 static void arm_encode_section_info (tree, rtx, int);
164 static void arm_file_end (void);
165 static void arm_file_start (void);
167 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
169 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
170 enum machine_mode, const_tree, bool);
171 static bool arm_promote_prototypes (const_tree);
172 static bool arm_default_short_enums (void);
173 static bool arm_align_anon_bitfield (void);
174 static bool arm_return_in_msb (const_tree);
175 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
176 static bool arm_return_in_memory (const_tree, const_tree);
177 #ifdef TARGET_UNWIND_INFO
178 static void arm_unwind_emit (FILE *, rtx);
179 static bool arm_output_ttype (rtx);
181 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
183 static tree arm_cxx_guard_type (void);
184 static bool arm_cxx_guard_mask_bit (void);
185 static tree arm_get_cookie_size (tree);
186 static bool arm_cookie_has_size (void);
187 static bool arm_cxx_cdtor_returns_this (void);
188 static bool arm_cxx_key_method_may_be_inline (void);
189 static void arm_cxx_determine_class_data_visibility (tree);
190 static bool arm_cxx_class_data_always_comdat (void);
191 static bool arm_cxx_use_aeabi_atexit (void);
192 static void arm_init_libfuncs (void);
193 static tree arm_build_builtin_va_list (void);
194 static void arm_expand_builtin_va_start (tree, rtx);
195 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
196 static bool arm_handle_option (size_t, const char *, int);
197 static void arm_target_help (void);
198 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
199 static bool arm_cannot_copy_insn_p (rtx);
200 static bool arm_tls_symbol_p (rtx x);
201 static int arm_issue_rate (void);
202 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
203 static bool arm_allocate_stack_slots_for_args (void);
204 static const char *arm_invalid_parameter_type (const_tree t);
205 static const char *arm_invalid_return_type (const_tree t);
206 static tree arm_promoted_type (const_tree t);
207 static tree arm_convert_to_type (tree type, tree expr);
208 static bool arm_scalar_mode_supported_p (enum machine_mode);
211 /* Initialize the GCC target structure. */
212 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
213 #undef TARGET_MERGE_DECL_ATTRIBUTES
214 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
217 #undef TARGET_LEGITIMIZE_ADDRESS
218 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
220 #undef TARGET_ATTRIBUTE_TABLE
221 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
223 #undef TARGET_ASM_FILE_START
224 #define TARGET_ASM_FILE_START arm_file_start
225 #undef TARGET_ASM_FILE_END
226 #define TARGET_ASM_FILE_END arm_file_end
228 #undef TARGET_ASM_ALIGNED_SI_OP
229 #define TARGET_ASM_ALIGNED_SI_OP NULL
230 #undef TARGET_ASM_INTEGER
231 #define TARGET_ASM_INTEGER arm_assemble_integer
233 #undef TARGET_ASM_FUNCTION_PROLOGUE
234 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
236 #undef TARGET_ASM_FUNCTION_EPILOGUE
237 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
239 #undef TARGET_DEFAULT_TARGET_FLAGS
240 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
241 #undef TARGET_HANDLE_OPTION
242 #define TARGET_HANDLE_OPTION arm_handle_option
244 #define TARGET_HELP arm_target_help
246 #undef TARGET_COMP_TYPE_ATTRIBUTES
247 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
249 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
250 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
252 #undef TARGET_SCHED_ADJUST_COST
253 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
255 #undef TARGET_ENCODE_SECTION_INFO
257 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
259 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
262 #undef TARGET_STRIP_NAME_ENCODING
263 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
265 #undef TARGET_ASM_INTERNAL_LABEL
266 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
268 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
269 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
271 #undef TARGET_ASM_OUTPUT_MI_THUNK
272 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
273 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
274 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
276 #undef TARGET_RTX_COSTS
277 #define TARGET_RTX_COSTS arm_rtx_costs
278 #undef TARGET_ADDRESS_COST
279 #define TARGET_ADDRESS_COST arm_address_cost
281 #undef TARGET_SHIFT_TRUNCATION_MASK
282 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
283 #undef TARGET_VECTOR_MODE_SUPPORTED_P
284 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
286 #undef TARGET_MACHINE_DEPENDENT_REORG
287 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
289 #undef TARGET_INIT_BUILTINS
290 #define TARGET_INIT_BUILTINS arm_init_builtins
291 #undef TARGET_EXPAND_BUILTIN
292 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
294 #undef TARGET_INIT_LIBFUNCS
295 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
297 #undef TARGET_PROMOTE_FUNCTION_ARGS
298 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
299 #undef TARGET_PROMOTE_FUNCTION_RETURN
300 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
301 #undef TARGET_PROMOTE_PROTOTYPES
302 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
303 #undef TARGET_PASS_BY_REFERENCE
304 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
305 #undef TARGET_ARG_PARTIAL_BYTES
306 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
308 #undef TARGET_SETUP_INCOMING_VARARGS
309 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
311 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
312 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
314 #undef TARGET_DEFAULT_SHORT_ENUMS
315 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
317 #undef TARGET_ALIGN_ANON_BITFIELD
318 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
320 #undef TARGET_NARROW_VOLATILE_BITFIELD
321 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
323 #undef TARGET_CXX_GUARD_TYPE
324 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
326 #undef TARGET_CXX_GUARD_MASK_BIT
327 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
329 #undef TARGET_CXX_GET_COOKIE_SIZE
330 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
332 #undef TARGET_CXX_COOKIE_HAS_SIZE
333 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
335 #undef TARGET_CXX_CDTOR_RETURNS_THIS
336 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
338 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
339 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
341 #undef TARGET_CXX_USE_AEABI_ATEXIT
342 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
344 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
345 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
346 arm_cxx_determine_class_data_visibility
348 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
349 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
351 #undef TARGET_RETURN_IN_MSB
352 #define TARGET_RETURN_IN_MSB arm_return_in_msb
354 #undef TARGET_RETURN_IN_MEMORY
355 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
357 #undef TARGET_MUST_PASS_IN_STACK
358 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
360 #ifdef TARGET_UNWIND_INFO
361 #undef TARGET_UNWIND_EMIT
362 #define TARGET_UNWIND_EMIT arm_unwind_emit
364 /* EABI unwinding tables use a different format for the typeinfo tables. */
365 #undef TARGET_ASM_TTYPE
366 #define TARGET_ASM_TTYPE arm_output_ttype
368 #undef TARGET_ARM_EABI_UNWINDER
369 #define TARGET_ARM_EABI_UNWINDER true
370 #endif /* TARGET_UNWIND_INFO */
372 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
373 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
375 #undef TARGET_CANNOT_COPY_INSN_P
376 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
379 #undef TARGET_HAVE_TLS
380 #define TARGET_HAVE_TLS true
383 #undef TARGET_CANNOT_FORCE_CONST_MEM
384 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
386 #undef TARGET_MAX_ANCHOR_OFFSET
387 #define TARGET_MAX_ANCHOR_OFFSET 4095
389 /* The minimum is set such that the total size of the block
390 for a particular anchor is -4088 + 1 + 4095 bytes, which is
391 divisible by eight, ensuring natural spacing of anchors. */
392 #undef TARGET_MIN_ANCHOR_OFFSET
393 #define TARGET_MIN_ANCHOR_OFFSET -4088
395 #undef TARGET_SCHED_ISSUE_RATE
396 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
398 #undef TARGET_MANGLE_TYPE
399 #define TARGET_MANGLE_TYPE arm_mangle_type
401 #undef TARGET_BUILD_BUILTIN_VA_LIST
402 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
403 #undef TARGET_EXPAND_BUILTIN_VA_START
404 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
405 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
406 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
409 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
410 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
413 #undef TARGET_LEGITIMATE_ADDRESS_P
414 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
416 #undef TARGET_INVALID_PARAMETER_TYPE
417 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
419 #undef TARGET_INVALID_RETURN_TYPE
420 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
422 #undef TARGET_PROMOTED_TYPE
423 #define TARGET_PROMOTED_TYPE arm_promoted_type
425 #undef TARGET_CONVERT_TO_TYPE
426 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
428 #undef TARGET_SCALAR_MODE_SUPPORTED_P
429 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
431 struct gcc_target targetm = TARGET_INITIALIZER;
433 /* Obstack for minipool constant handling. */
434 static struct obstack minipool_obstack;
435 static char * minipool_startobj;
437 /* The maximum number of insns skipped which
438 will be conditionalised if possible. */
439 static int max_insns_skipped = 5;
441 extern FILE * asm_out_file;
443 /* True if we are currently building a constant table. */
444 int making_const_table;
446 /* The processor for which instructions should be scheduled. */
447 enum processor_type arm_tune = arm_none;
449 /* The default processor used if not overridden by commandline. */
450 static enum processor_type arm_default_cpu = arm_none;
452 /* Which floating point model to use. */
453 enum arm_fp_model arm_fp_model;
455 /* Which floating point hardware is available. */
456 enum fputype arm_fpu_arch;
458 /* Which floating point hardware to schedule for. */
459 enum fputype arm_fpu_tune;
461 /* Whether to use floating point hardware. */
462 enum float_abi_type arm_float_abi;
464 /* Which __fp16 format to use. */
465 enum arm_fp16_format_type arm_fp16_format;
467 /* Which ABI to use. */
468 enum arm_abi_type arm_abi;
470 /* Which thread pointer model to use. */
471 enum arm_tp_type target_thread_pointer = TP_AUTO;
473 /* Used to parse -mstructure_size_boundary command line option. */
474 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
476 /* Used for Thumb call_via trampolines. */
477 rtx thumb_call_via_label[14];
478 static int thumb_call_reg_needed;
480 /* Bit values used to identify processor capabilities. */
481 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
482 #define FL_ARCH3M (1 << 1) /* Extended multiply */
483 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
484 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
485 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
486 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
487 #define FL_THUMB (1 << 6) /* Thumb aware */
488 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
489 #define FL_STRONG (1 << 8) /* StrongARM */
490 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
491 #define FL_XSCALE (1 << 10) /* XScale */
492 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
493 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
494 media instructions. */
495 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
496 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
497 Note: ARM6 & 7 derivatives only. */
498 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
499 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
500 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
502 #define FL_DIV (1 << 18) /* Hardware divide. */
503 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
504 #define FL_NEON (1 << 20) /* Neon instructions. */
506 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
508 #define FL_FOR_ARCH2 FL_NOTM
509 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
510 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
511 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
512 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
513 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
514 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
515 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
516 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
517 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
518 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
519 #define FL_FOR_ARCH6J FL_FOR_ARCH6
520 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
521 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
522 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
523 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
524 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
525 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
526 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
527 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
528 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
530 /* The bits in this mask specify which
531 instructions we are allowed to generate. */
532 static unsigned long insn_flags = 0;
534 /* The bits in this mask specify which instruction scheduling options should
536 static unsigned long tune_flags = 0;
538 /* The following are used in the arm.md file as equivalents to bits
539 in the above two flag variables. */
541 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
544 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
547 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
550 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
553 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
556 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
559 /* Nonzero if this chip supports the ARM 6K extensions. */
562 /* Nonzero if instructions not present in the 'M' profile can be used. */
563 int arm_arch_notm = 0;
565 /* Nonzero if this chip can benefit from load scheduling. */
566 int arm_ld_sched = 0;
568 /* Nonzero if this chip is a StrongARM. */
569 int arm_tune_strongarm = 0;
571 /* Nonzero if this chip is a Cirrus variant. */
572 int arm_arch_cirrus = 0;
574 /* Nonzero if this chip supports Intel Wireless MMX technology. */
575 int arm_arch_iwmmxt = 0;
577 /* Nonzero if this chip is an XScale. */
578 int arm_arch_xscale = 0;
580 /* Nonzero if tuning for XScale */
581 int arm_tune_xscale = 0;
583 /* Nonzero if we want to tune for stores that access the write-buffer.
584 This typically means an ARM6 or ARM7 with MMU or MPU. */
585 int arm_tune_wbuf = 0;
587 /* Nonzero if tuning for Cortex-A9. */
588 int arm_tune_cortex_a9 = 0;
590 /* Nonzero if generating Thumb instructions. */
593 /* Nonzero if we should define __THUMB_INTERWORK__ in the
595 XXX This is a bit of a hack, it's intended to help work around
596 problems in GLD which doesn't understand that armv5t code is
597 interworking clean. */
598 int arm_cpp_interwork = 0;
600 /* Nonzero if chip supports Thumb 2. */
603 /* Nonzero if chip supports integer division instruction. */
606 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
607 must report the mode of the memory reference from PRINT_OPERAND to
608 PRINT_OPERAND_ADDRESS. */
609 enum machine_mode output_memory_reference_mode;
611 /* The register number to be used for the PIC offset register. */
612 unsigned arm_pic_register = INVALID_REGNUM;
614 /* Set to 1 after arm_reorg has started. Reset to start at the start of
615 the next function. */
616 static int after_arm_reorg = 0;
618 /* The maximum number of insns to be used when loading a constant. */
619 static int arm_constant_limit = 3;
621 /* For an explanation of these variables, see final_prescan_insn below. */
623 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
624 enum arm_cond_code arm_current_cc;
626 int arm_target_label;
627 /* The number of conditionally executed insns, including the current insn. */
628 int arm_condexec_count = 0;
629 /* A bitmask specifying the patterns for the IT block.
630 Zero means do not output an IT block before this insn. */
631 int arm_condexec_mask = 0;
632 /* The number of bits used in arm_condexec_mask. */
633 int arm_condexec_masklen = 0;
635 /* The condition codes of the ARM, and the inverse function. */
636 static const char * const arm_condition_codes[] =
638 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
639 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
642 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
643 #define streq(string1, string2) (strcmp (string1, string2) == 0)
645 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
646 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
647 | (1 << PIC_OFFSET_TABLE_REGNUM)))
649 /* Initialization code. */
653 const char *const name;
654 enum processor_type core;
656 const unsigned long flags;
657 bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
660 /* Not all of these give usefully different compilation alternatives,
661 but there is no simple way of generalizing them. */
662 static const struct processors all_cores[] =
665 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
666 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
667 #include "arm-cores.def"
669 {NULL, arm_none, NULL, 0, NULL}
672 static const struct processors all_architectures[] =
674 /* ARM Architectures */
675 /* We don't specify rtx_costs here as it will be figured out
678 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
679 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
680 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
681 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
682 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
683 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
684 implementations that support it, so we will leave it out for now. */
685 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
686 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
687 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
688 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
689 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
690 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
691 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
692 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
693 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
694 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
695 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
696 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
697 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
698 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
699 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
700 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
701 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
702 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
703 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
704 {NULL, arm_none, NULL, 0 , NULL}
707 struct arm_cpu_select
711 const struct processors * processors;
714 /* This is a magic structure. The 'string' field is magically filled in
715 with a pointer to the value specified by the user on the command line
716 assuming that the user has specified such a value. */
718 static struct arm_cpu_select arm_select[] =
720 /* string name processors */
721 { NULL, "-mcpu=", all_cores },
722 { NULL, "-march=", all_architectures },
723 { NULL, "-mtune=", all_cores }
726 /* Defines representing the indexes into the above table. */
727 #define ARM_OPT_SET_CPU 0
728 #define ARM_OPT_SET_ARCH 1
729 #define ARM_OPT_SET_TUNE 2
731 /* The name of the preprocessor macro to define for this architecture. */
733 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
742 /* Available values for -mfpu=. */
744 static const struct fpu_desc all_fpus[] =
746 {"fpa", FPUTYPE_FPA},
747 {"fpe2", FPUTYPE_FPA_EMU2},
748 {"fpe3", FPUTYPE_FPA_EMU2},
749 {"maverick", FPUTYPE_MAVERICK},
750 {"vfp", FPUTYPE_VFP},
751 {"vfp3", FPUTYPE_VFP3},
752 {"vfpv3", FPUTYPE_VFP3},
753 {"vfpv3-d16", FPUTYPE_VFP3D16},
754 {"neon", FPUTYPE_NEON},
755 {"neon-fp16", FPUTYPE_NEON_FP16}
759 /* Floating point models used by the different hardware.
760 See fputype in arm.h. */
762 static const enum arm_fp_model fp_model_for_fpu[] =
764 /* No FP hardware. */
765 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
766 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
767 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
768 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
769 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
770 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
771 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */
772 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
773 ARM_FP_MODEL_VFP, /* FPUTYPE_NEON */
774 ARM_FP_MODEL_VFP /* FPUTYPE_NEON_FP16 */
781 enum float_abi_type abi_type;
785 /* Available values for -mfloat-abi=. */
787 static const struct float_abi all_float_abis[] =
789 {"soft", ARM_FLOAT_ABI_SOFT},
790 {"softfp", ARM_FLOAT_ABI_SOFTFP},
791 {"hard", ARM_FLOAT_ABI_HARD}
798 enum arm_fp16_format_type fp16_format_type;
802 /* Available values for -mfp16-format=. */
804 static const struct fp16_format all_fp16_formats[] =
806 {"none", ARM_FP16_FORMAT_NONE},
807 {"ieee", ARM_FP16_FORMAT_IEEE},
808 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
815 enum arm_abi_type abi_type;
819 /* Available values for -mabi=. */
821 static const struct abi_name arm_all_abis[] =
823 {"apcs-gnu", ARM_ABI_APCS},
824 {"atpcs", ARM_ABI_ATPCS},
825 {"aapcs", ARM_ABI_AAPCS},
826 {"iwmmxt", ARM_ABI_IWMMXT},
827 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
830 /* Supported TLS relocations. */
840 /* Emit an insn that's a simple single-set. Both the operands must be known
843 emit_set_insn (rtx x, rtx y)
845 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
848 /* Return the number of bits set in VALUE. */
850 bit_count (unsigned long value)
852 unsigned long count = 0;
857 value &= value - 1; /* Clear the least-significant set bit. */
863 /* Set up library functions unique to ARM. */
866 arm_init_libfuncs (void)
868 /* There are no special library functions unless we are using the
873 /* The functions below are described in Section 4 of the "Run-Time
874 ABI for the ARM architecture", Version 1.0. */
876 /* Double-precision floating-point arithmetic. Table 2. */
877 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
878 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
879 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
880 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
881 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
883 /* Double-precision comparisons. Table 3. */
884 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
885 set_optab_libfunc (ne_optab, DFmode, NULL);
886 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
887 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
888 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
889 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
890 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
892 /* Single-precision floating-point arithmetic. Table 4. */
893 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
894 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
895 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
896 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
897 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
899 /* Single-precision comparisons. Table 5. */
900 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
901 set_optab_libfunc (ne_optab, SFmode, NULL);
902 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
903 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
904 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
905 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
906 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
908 /* Floating-point to integer conversions. Table 6. */
909 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
910 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
911 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
912 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
913 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
914 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
915 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
916 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
918 /* Conversions between floating types. Table 7. */
919 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
920 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
922 /* Integer to floating-point conversions. Table 8. */
923 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
924 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
925 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
926 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
927 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
928 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
929 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
930 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
932 /* Long long. Table 9. */
933 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
934 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
935 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
936 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
937 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
938 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
939 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
940 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
942 /* Integer (32/32->32) division. \S 4.3.1. */
943 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
944 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
946 /* The divmod functions are designed so that they can be used for
947 plain division, even though they return both the quotient and the
948 remainder. The quotient is returned in the usual location (i.e.,
949 r0 for SImode, {r0, r1} for DImode), just as would be expected
950 for an ordinary division routine. Because the AAPCS calling
951 conventions specify that all of { r0, r1, r2, r3 } are
952 callee-saved registers, there is no need to tell the compiler
953 explicitly that those registers are clobbered by these
955 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
956 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
958 /* For SImode division the ABI provides div-without-mod routines,
960 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
961 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
963 /* We don't have mod libcalls. Fortunately gcc knows how to use the
964 divmod libcalls instead. */
965 set_optab_libfunc (smod_optab, DImode, NULL);
966 set_optab_libfunc (umod_optab, DImode, NULL);
967 set_optab_libfunc (smod_optab, SImode, NULL);
968 set_optab_libfunc (umod_optab, SImode, NULL);
970 /* Half-precision float operations. The compiler handles all operations
971 with NULL libfuncs by converting the SFmode. */
972 switch (arm_fp16_format)
974 case ARM_FP16_FORMAT_IEEE:
975 case ARM_FP16_FORMAT_ALTERNATIVE:
978 set_conv_libfunc (trunc_optab, HFmode, SFmode,
979 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
981 : "__gnu_f2h_alternative"));
982 set_conv_libfunc (sext_optab, SFmode, HFmode,
983 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
985 : "__gnu_h2f_alternative"));
988 set_optab_libfunc (add_optab, HFmode, NULL);
989 set_optab_libfunc (sdiv_optab, HFmode, NULL);
990 set_optab_libfunc (smul_optab, HFmode, NULL);
991 set_optab_libfunc (neg_optab, HFmode, NULL);
992 set_optab_libfunc (sub_optab, HFmode, NULL);
995 set_optab_libfunc (eq_optab, HFmode, NULL);
996 set_optab_libfunc (ne_optab, HFmode, NULL);
997 set_optab_libfunc (lt_optab, HFmode, NULL);
998 set_optab_libfunc (le_optab, HFmode, NULL);
999 set_optab_libfunc (ge_optab, HFmode, NULL);
1000 set_optab_libfunc (gt_optab, HFmode, NULL);
1001 set_optab_libfunc (unord_optab, HFmode, NULL);
1009 /* On AAPCS systems, this is the "struct __va_list". */
1010 static GTY(()) tree va_list_type;
1012 /* Return the type to use as __builtin_va_list. */
1014 arm_build_builtin_va_list (void)
1019 if (!TARGET_AAPCS_BASED)
1020 return std_build_builtin_va_list ();
1022 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1030 The C Library ABI further reinforces this definition in \S
1033 We must follow this definition exactly. The structure tag
1034 name is visible in C++ mangled names, and thus forms a part
1035 of the ABI. The field name may be used by people who
1036 #include <stdarg.h>. */
1037 /* Create the type. */
1038 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1039 /* Give it the required name. */
1040 va_list_name = build_decl (BUILTINS_LOCATION,
1042 get_identifier ("__va_list"),
1044 DECL_ARTIFICIAL (va_list_name) = 1;
1045 TYPE_NAME (va_list_type) = va_list_name;
1046 /* Create the __ap field. */
1047 ap_field = build_decl (BUILTINS_LOCATION,
1049 get_identifier ("__ap"),
1051 DECL_ARTIFICIAL (ap_field) = 1;
1052 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1053 TYPE_FIELDS (va_list_type) = ap_field;
1054 /* Compute its layout. */
1055 layout_type (va_list_type);
1057 return va_list_type;
1060 /* Return an expression of type "void *" pointing to the next
1061 available argument in a variable-argument list. VALIST is the
1062 user-level va_list object, of type __builtin_va_list. */
1064 arm_extract_valist_ptr (tree valist)
1066 if (TREE_TYPE (valist) == error_mark_node)
1067 return error_mark_node;
1069 /* On an AAPCS target, the pointer is stored within "struct
1071 if (TARGET_AAPCS_BASED)
1073 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1074 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1075 valist, ap_field, NULL_TREE);
1081 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1083 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1085 valist = arm_extract_valist_ptr (valist);
1086 std_expand_builtin_va_start (valist, nextarg);
1089 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1091 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1094 valist = arm_extract_valist_ptr (valist);
1095 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1098 /* Implement TARGET_HANDLE_OPTION. */
1101 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1106 arm_select[1].string = arg;
1110 arm_select[0].string = arg;
1113 case OPT_mhard_float:
1114 target_float_abi_name = "hard";
1117 case OPT_msoft_float:
1118 target_float_abi_name = "soft";
1122 arm_select[2].string = arg;
1131 arm_target_help (void)
1134 static int columns = 0;
1137 /* If we have not done so already, obtain the desired maximum width of
1138 the output. Note - this is a duplication of the code at the start of
1139 gcc/opts.c:print_specific_help() - the two copies should probably be
1140 replaced by a single function. */
1145 GET_ENVIRONMENT (p, "COLUMNS");
1148 int value = atoi (p);
1155 /* Use a reasonable default. */
1159 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1161 /* The - 2 is because we know that the last entry in the array is NULL. */
1162 i = ARRAY_SIZE (all_cores) - 2;
1164 printf (" %s", all_cores[i].name);
1165 remaining = columns - (strlen (all_cores[i].name) + 4);
1166 gcc_assert (remaining >= 0);
1170 int len = strlen (all_cores[i].name);
1172 if (remaining > len + 2)
1174 printf (", %s", all_cores[i].name);
1175 remaining -= len + 2;
1181 printf ("\n %s", all_cores[i].name);
1182 remaining = columns - (len + 4);
1186 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1188 i = ARRAY_SIZE (all_architectures) - 2;
1191 printf (" %s", all_architectures[i].name);
1192 remaining = columns - (strlen (all_architectures[i].name) + 4);
1193 gcc_assert (remaining >= 0);
1197 int len = strlen (all_architectures[i].name);
1199 if (remaining > len + 2)
1201 printf (", %s", all_architectures[i].name);
1202 remaining -= len + 2;
1208 printf ("\n %s", all_architectures[i].name);
1209 remaining = columns - (len + 4);
1216 /* Fix up any incompatible options that the user has specified.
1217 This has now turned into a maze. */
1219 arm_override_options (void)
1222 enum processor_type target_arch_cpu = arm_none;
1223 enum processor_type selected_cpu = arm_none;
1225 /* Set up the flags based on the cpu/architecture selected by the user. */
1226 for (i = ARRAY_SIZE (arm_select); i--;)
1228 struct arm_cpu_select * ptr = arm_select + i;
1230 if (ptr->string != NULL && ptr->string[0] != '\0')
1232 const struct processors * sel;
1234 for (sel = ptr->processors; sel->name != NULL; sel++)
1235 if (streq (ptr->string, sel->name))
1237 /* Set the architecture define. */
1238 if (i != ARM_OPT_SET_TUNE)
1239 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1241 /* Determine the processor core for which we should
1242 tune code-generation. */
1243 if (/* -mcpu= is a sensible default. */
1244 i == ARM_OPT_SET_CPU
1245 /* -mtune= overrides -mcpu= and -march=. */
1246 || i == ARM_OPT_SET_TUNE)
1247 arm_tune = (enum processor_type) (sel - ptr->processors);
1249 /* Remember the CPU associated with this architecture.
1250 If no other option is used to set the CPU type,
1251 we'll use this to guess the most suitable tuning
1253 if (i == ARM_OPT_SET_ARCH)
1254 target_arch_cpu = sel->core;
1256 if (i == ARM_OPT_SET_CPU)
1257 selected_cpu = (enum processor_type) (sel - ptr->processors);
1259 if (i != ARM_OPT_SET_TUNE)
1261 /* If we have been given an architecture and a processor
1262 make sure that they are compatible. We only generate
1263 a warning though, and we prefer the CPU over the
1265 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1266 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1269 insn_flags = sel->flags;
1275 if (sel->name == NULL)
1276 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1280 /* Guess the tuning options from the architecture if necessary. */
1281 if (arm_tune == arm_none)
1282 arm_tune = target_arch_cpu;
1284 /* If the user did not specify a processor, choose one for them. */
1285 if (insn_flags == 0)
1287 const struct processors * sel;
1288 unsigned int sought;
1290 selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
1291 if (selected_cpu == arm_none)
1293 #ifdef SUBTARGET_CPU_DEFAULT
1294 /* Use the subtarget default CPU if none was specified by
1296 selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;
1298 /* Default to ARM6. */
1299 if (selected_cpu == arm_none)
1300 selected_cpu = arm6;
1302 sel = &all_cores[selected_cpu];
1304 insn_flags = sel->flags;
1306 /* Now check to see if the user has specified some command line
1307 switch that require certain abilities from the cpu. */
1310 if (TARGET_INTERWORK || TARGET_THUMB)
1312 sought |= (FL_THUMB | FL_MODE32);
1314 /* There are no ARM processors that support both APCS-26 and
1315 interworking. Therefore we force FL_MODE26 to be removed
1316 from insn_flags here (if it was set), so that the search
1317 below will always be able to find a compatible processor. */
1318 insn_flags &= ~FL_MODE26;
1321 if (sought != 0 && ((sought & insn_flags) != sought))
1323 /* Try to locate a CPU type that supports all of the abilities
1324 of the default CPU, plus the extra abilities requested by
1326 for (sel = all_cores; sel->name != NULL; sel++)
1327 if ((sel->flags & sought) == (sought | insn_flags))
1330 if (sel->name == NULL)
1332 unsigned current_bit_count = 0;
1333 const struct processors * best_fit = NULL;
1335 /* Ideally we would like to issue an error message here
1336 saying that it was not possible to find a CPU compatible
1337 with the default CPU, but which also supports the command
1338 line options specified by the programmer, and so they
1339 ought to use the -mcpu=<name> command line option to
1340 override the default CPU type.
1342 If we cannot find a cpu that has both the
1343 characteristics of the default cpu and the given
1344 command line options we scan the array again looking
1345 for a best match. */
1346 for (sel = all_cores; sel->name != NULL; sel++)
1347 if ((sel->flags & sought) == sought)
1351 count = bit_count (sel->flags & insn_flags);
1353 if (count >= current_bit_count)
1356 current_bit_count = count;
1360 gcc_assert (best_fit);
1364 insn_flags = sel->flags;
1366 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1367 arm_default_cpu = (enum processor_type) (sel - all_cores);
1368 if (arm_tune == arm_none)
1369 arm_tune = arm_default_cpu;
1372 /* The processor for which we should tune should now have been
1374 gcc_assert (arm_tune != arm_none);
1376 tune_flags = all_cores[(int)arm_tune].flags;
1378 if (target_fp16_format_name)
1380 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1382 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1384 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1388 if (i == ARRAY_SIZE (all_fp16_formats))
1389 error ("invalid __fp16 format option: -mfp16-format=%s",
1390 target_fp16_format_name);
1393 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1395 if (target_abi_name)
1397 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1399 if (streq (arm_all_abis[i].name, target_abi_name))
1401 arm_abi = arm_all_abis[i].abi_type;
1405 if (i == ARRAY_SIZE (arm_all_abis))
1406 error ("invalid ABI option: -mabi=%s", target_abi_name);
1409 arm_abi = ARM_DEFAULT_ABI;
1411 /* Make sure that the processor choice does not conflict with any of the
1412 other command line choices. */
1413 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1414 error ("target CPU does not support ARM mode");
1416 /* BPABI targets use linker tricks to allow interworking on cores
1417 without thumb support. */
1418 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1420 warning (0, "target CPU does not support interworking" );
1421 target_flags &= ~MASK_INTERWORK;
1424 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1426 warning (0, "target CPU does not support THUMB instructions");
1427 target_flags &= ~MASK_THUMB;
1430 if (TARGET_APCS_FRAME && TARGET_THUMB)
1432 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1433 target_flags &= ~MASK_APCS_FRAME;
1436 /* Callee super interworking implies thumb interworking. Adding
1437 this to the flags here simplifies the logic elsewhere. */
1438 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1439 target_flags |= MASK_INTERWORK;
1441 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1442 from here where no function is being compiled currently. */
1443 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1444 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1446 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1447 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1449 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1450 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1452 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1454 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1455 target_flags |= MASK_APCS_FRAME;
1458 if (TARGET_POKE_FUNCTION_NAME)
1459 target_flags |= MASK_APCS_FRAME;
1461 if (TARGET_APCS_REENT && flag_pic)
1462 error ("-fpic and -mapcs-reent are incompatible");
1464 if (TARGET_APCS_REENT)
1465 warning (0, "APCS reentrant code not supported. Ignored");
1467 /* If this target is normally configured to use APCS frames, warn if they
1468 are turned off and debugging is turned on. */
1470 && write_symbols != NO_DEBUG
1471 && !TARGET_APCS_FRAME
1472 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1473 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1475 if (TARGET_APCS_FLOAT)
1476 warning (0, "passing floating point arguments in fp regs not yet supported");
1478 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1479 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1480 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1481 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1482 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1483 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1484 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1485 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1486 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1487 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1488 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1489 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1491 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1492 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1493 thumb_code = (TARGET_ARM == 0);
1494 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1495 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1496 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1497 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1498 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1500 /* If we are not using the default (ARM mode) section anchor offset
1501 ranges, then set the correct ranges now. */
1504 /* Thumb-1 LDR instructions cannot have negative offsets.
1505 Permissible positive offset ranges are 5-bit (for byte loads),
1506 6-bit (for halfword loads), or 7-bit (for word loads).
1507 Empirical results suggest a 7-bit anchor range gives the best
1508 overall code size. */
1509 targetm.min_anchor_offset = 0;
1510 targetm.max_anchor_offset = 127;
1512 else if (TARGET_THUMB2)
1514 /* The minimum is set such that the total size of the block
1515 for a particular anchor is 248 + 1 + 4095 bytes, which is
1516 divisible by eight, ensuring natural spacing of anchors. */
1517 targetm.min_anchor_offset = -248;
1518 targetm.max_anchor_offset = 4095;
1521 /* V5 code we generate is completely interworking capable, so we turn off
1522 TARGET_INTERWORK here to avoid many tests later on. */
1524 /* XXX However, we must pass the right pre-processor defines to CPP
1525 or GLD can get confused. This is a hack. */
1526 if (TARGET_INTERWORK)
1527 arm_cpp_interwork = 1;
1530 target_flags &= ~MASK_INTERWORK;
1532 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1533 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1535 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1536 error ("iwmmxt abi requires an iwmmxt capable cpu");
1538 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1539 if (target_fpu_name == NULL && target_fpe_name != NULL)
1541 if (streq (target_fpe_name, "2"))
1542 target_fpu_name = "fpe2";
1543 else if (streq (target_fpe_name, "3"))
1544 target_fpu_name = "fpe3";
1546 error ("invalid floating point emulation option: -mfpe=%s",
1549 if (target_fpu_name != NULL)
1551 /* The user specified a FPU. */
1552 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1554 if (streq (all_fpus[i].name, target_fpu_name))
1556 arm_fpu_arch = all_fpus[i].fpu;
1557 arm_fpu_tune = arm_fpu_arch;
1558 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1562 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1563 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1567 #ifdef FPUTYPE_DEFAULT
1568 /* Use the default if it is specified for this platform. */
1569 arm_fpu_arch = FPUTYPE_DEFAULT;
1570 arm_fpu_tune = FPUTYPE_DEFAULT;
1572 /* Pick one based on CPU type. */
1573 /* ??? Some targets assume FPA is the default.
1574 if ((insn_flags & FL_VFP) != 0)
1575 arm_fpu_arch = FPUTYPE_VFP;
1578 if (arm_arch_cirrus)
1579 arm_fpu_arch = FPUTYPE_MAVERICK;
1581 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1583 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1584 arm_fpu_tune = FPUTYPE_FPA;
1586 arm_fpu_tune = arm_fpu_arch;
1587 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1588 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1591 if (target_float_abi_name != NULL)
1593 /* The user specified a FP ABI. */
1594 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1596 if (streq (all_float_abis[i].name, target_float_abi_name))
1598 arm_float_abi = all_float_abis[i].abi_type;
1602 if (i == ARRAY_SIZE (all_float_abis))
1603 error ("invalid floating point abi: -mfloat-abi=%s",
1604 target_float_abi_name);
1607 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1609 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1610 sorry ("-mfloat-abi=hard and VFP");
1612 if (TARGET_AAPCS_BASED
1613 && (arm_fp_model == ARM_FP_MODEL_FPA))
1614 error ("FPA is unsupported in the AAPCS");
1616 if (TARGET_AAPCS_BASED)
1618 if (TARGET_CALLER_INTERWORKING)
1619 error ("AAPCS does not support -mcaller-super-interworking");
1621 if (TARGET_CALLEE_INTERWORKING)
1622 error ("AAPCS does not support -mcallee-super-interworking");
1625 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1626 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1627 will ever exist. GCC makes no attempt to support this combination. */
1628 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1629 sorry ("iWMMXt and hardware floating point");
1631 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1632 if (TARGET_THUMB2 && TARGET_IWMMXT)
1633 sorry ("Thumb-2 iWMMXt");
1635 /* __fp16 support currently assumes the core has ldrh. */
1636 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1637 sorry ("__fp16 and no ldrh");
1639 /* If soft-float is specified then don't use FPU. */
1640 if (TARGET_SOFT_FLOAT)
1641 arm_fpu_arch = FPUTYPE_NONE;
1643 /* For arm2/3 there is no need to do any scheduling if there is only
1644 a floating point emulator, or we are doing software floating-point. */
1645 if ((TARGET_SOFT_FLOAT
1646 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1647 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1648 && (tune_flags & FL_MODE32) == 0)
1649 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1651 if (target_thread_switch)
1653 if (strcmp (target_thread_switch, "soft") == 0)
1654 target_thread_pointer = TP_SOFT;
1655 else if (strcmp (target_thread_switch, "auto") == 0)
1656 target_thread_pointer = TP_AUTO;
1657 else if (strcmp (target_thread_switch, "cp15") == 0)
1658 target_thread_pointer = TP_CP15;
1660 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1663 /* Use the cp15 method if it is available. */
1664 if (target_thread_pointer == TP_AUTO)
1666 if (arm_arch6k && !TARGET_THUMB)
1667 target_thread_pointer = TP_CP15;
1669 target_thread_pointer = TP_SOFT;
1672 if (TARGET_HARD_TP && TARGET_THUMB1)
1673 error ("can not use -mtp=cp15 with 16-bit Thumb");
1675 /* Override the default structure alignment for AAPCS ABI. */
1676 if (TARGET_AAPCS_BASED)
1677 arm_structure_size_boundary = 8;
1679 if (structure_size_string != NULL)
1681 int size = strtol (structure_size_string, NULL, 0);
1683 if (size == 8 || size == 32
1684 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1685 arm_structure_size_boundary = size;
1687 warning (0, "structure size boundary can only be set to %s",
1688 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1691 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1693 error ("RTP PIC is incompatible with Thumb");
1697 /* If stack checking is disabled, we can use r10 as the PIC register,
1698 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1699 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1701 if (TARGET_VXWORKS_RTP)
1702 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1703 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1706 if (flag_pic && TARGET_VXWORKS_RTP)
1707 arm_pic_register = 9;
1709 if (arm_pic_register_string != NULL)
1711 int pic_register = decode_reg_name (arm_pic_register_string);
1714 warning (0, "-mpic-register= is useless without -fpic");
1716 /* Prevent the user from choosing an obviously stupid PIC register. */
1717 else if (pic_register < 0 || call_used_regs[pic_register]
1718 || pic_register == HARD_FRAME_POINTER_REGNUM
1719 || pic_register == STACK_POINTER_REGNUM
1720 || pic_register >= PC_REGNUM
1721 || (TARGET_VXWORKS_RTP
1722 && (unsigned int) pic_register != arm_pic_register))
1723 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1725 arm_pic_register = pic_register;
1728 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1729 if (fix_cm3_ldrd == 2)
1731 if (selected_cpu == cortexm3)
1737 /* ??? We might want scheduling for thumb2. */
1738 if (TARGET_THUMB && flag_schedule_insns)
1740 /* Don't warn since it's on by default in -O2. */
1741 flag_schedule_insns = 0;
1746 arm_constant_limit = 1;
1748 /* If optimizing for size, bump the number of instructions that we
1749 are prepared to conditionally execute (even on a StrongARM). */
1750 max_insns_skipped = 6;
1754 /* For processors with load scheduling, it never costs more than
1755 2 cycles to load a constant, and the load scheduler may well
1756 reduce that to 1. */
1758 arm_constant_limit = 1;
1760 /* On XScale the longer latency of a load makes it more difficult
1761 to achieve a good schedule, so it's faster to synthesize
1762 constants that can be done in two insns. */
1763 if (arm_tune_xscale)
1764 arm_constant_limit = 2;
1766 /* StrongARM has early execution of branches, so a sequence
1767 that is worth skipping is shorter. */
1768 if (arm_tune_strongarm)
1769 max_insns_skipped = 3;
1772 /* Register global variables with the garbage collector. */
1773 arm_add_gc_roots ();
1777 arm_add_gc_roots (void)
1779 gcc_obstack_init(&minipool_obstack);
1780 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1783 /* A table of known ARM exception types.
1784 For use with the interrupt function attribute. */
1788 const char *const arg;
1789 const unsigned long return_value;
1793 static const isr_attribute_arg isr_attribute_args [] =
1795 { "IRQ", ARM_FT_ISR },
1796 { "irq", ARM_FT_ISR },
1797 { "FIQ", ARM_FT_FIQ },
1798 { "fiq", ARM_FT_FIQ },
1799 { "ABORT", ARM_FT_ISR },
1800 { "abort", ARM_FT_ISR },
1801 { "ABORT", ARM_FT_ISR },
1802 { "abort", ARM_FT_ISR },
1803 { "UNDEF", ARM_FT_EXCEPTION },
1804 { "undef", ARM_FT_EXCEPTION },
1805 { "SWI", ARM_FT_EXCEPTION },
1806 { "swi", ARM_FT_EXCEPTION },
1807 { NULL, ARM_FT_NORMAL }
1810 /* Returns the (interrupt) function type of the current
1811 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1813 static unsigned long
1814 arm_isr_value (tree argument)
1816 const isr_attribute_arg * ptr;
1820 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1822 /* No argument - default to IRQ. */
1823 if (argument == NULL_TREE)
1826 /* Get the value of the argument. */
1827 if (TREE_VALUE (argument) == NULL_TREE
1828 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1829 return ARM_FT_UNKNOWN;
1831 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1833 /* Check it against the list of known arguments. */
1834 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1835 if (streq (arg, ptr->arg))
1836 return ptr->return_value;
1838 /* An unrecognized interrupt type. */
1839 return ARM_FT_UNKNOWN;
1842 /* Computes the type of the current function. */
1844 static unsigned long
1845 arm_compute_func_type (void)
1847 unsigned long type = ARM_FT_UNKNOWN;
1851 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1853 /* Decide if the current function is volatile. Such functions
1854 never return, and many memory cycles can be saved by not storing
1855 register values that will never be needed again. This optimization
1856 was added to speed up context switching in a kernel application. */
1858 && (TREE_NOTHROW (current_function_decl)
1859 || !(flag_unwind_tables
1860 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1861 && TREE_THIS_VOLATILE (current_function_decl))
1862 type |= ARM_FT_VOLATILE;
1864 if (cfun->static_chain_decl != NULL)
1865 type |= ARM_FT_NESTED;
1867 attr = DECL_ATTRIBUTES (current_function_decl);
1869 a = lookup_attribute ("naked", attr);
1871 type |= ARM_FT_NAKED;
1873 a = lookup_attribute ("isr", attr);
1875 a = lookup_attribute ("interrupt", attr);
1878 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1880 type |= arm_isr_value (TREE_VALUE (a));
1885 /* Returns the type of the current function. */
1888 arm_current_func_type (void)
1890 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1891 cfun->machine->func_type = arm_compute_func_type ();
1893 return cfun->machine->func_type;
1897 arm_allocate_stack_slots_for_args (void)
1899 /* Naked functions should not allocate stack slots for arguments. */
1900 return !IS_NAKED (arm_current_func_type ());
1904 /* Return 1 if it is possible to return using a single instruction.
1905 If SIBLING is non-null, this is a test for a return before a sibling
1906 call. SIBLING is the call insn, so we can examine its register usage. */
1909 use_return_insn (int iscond, rtx sibling)
1912 unsigned int func_type;
1913 unsigned long saved_int_regs;
1914 unsigned HOST_WIDE_INT stack_adjust;
1915 arm_stack_offsets *offsets;
1917 /* Never use a return instruction before reload has run. */
1918 if (!reload_completed)
1921 func_type = arm_current_func_type ();
1923 /* Naked, volatile and stack alignment functions need special
1925 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
1928 /* So do interrupt functions that use the frame pointer and Thumb
1929 interrupt functions. */
1930 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
1933 offsets = arm_get_frame_offsets ();
1934 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1936 /* As do variadic functions. */
1937 if (crtl->args.pretend_args_size
1938 || cfun->machine->uses_anonymous_args
1939 /* Or if the function calls __builtin_eh_return () */
1940 || crtl->calls_eh_return
1941 /* Or if the function calls alloca */
1942 || cfun->calls_alloca
1943 /* Or if there is a stack adjustment. However, if the stack pointer
1944 is saved on the stack, we can use a pre-incrementing stack load. */
1945 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
1946 && stack_adjust == 4)))
1949 saved_int_regs = offsets->saved_regs_mask;
1951 /* Unfortunately, the insn
1953 ldmib sp, {..., sp, ...}
1955 triggers a bug on most SA-110 based devices, such that the stack
1956 pointer won't be correctly restored if the instruction takes a
1957 page fault. We work around this problem by popping r3 along with
1958 the other registers, since that is never slower than executing
1959 another instruction.
1961 We test for !arm_arch5 here, because code for any architecture
1962 less than this could potentially be run on one of the buggy
1964 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
1966 /* Validate that r3 is a call-clobbered register (always true in
1967 the default abi) ... */
1968 if (!call_used_regs[3])
1971 /* ... that it isn't being used for a return value ... */
1972 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1975 /* ... or for a tail-call argument ... */
1978 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1980 if (find_regno_fusage (sibling, USE, 3))
1984 /* ... and that there are no call-saved registers in r0-r2
1985 (always true in the default ABI). */
1986 if (saved_int_regs & 0x7)
1990 /* Can't be done if interworking with Thumb, and any registers have been
1992 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
1995 /* On StrongARM, conditional returns are expensive if they aren't
1996 taken and multiple registers have been stacked. */
1997 if (iscond && arm_tune_strongarm)
1999 /* Conditional return when just the LR is stored is a simple
2000 conditional-load instruction, that's not expensive. */
2001 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2005 && arm_pic_register != INVALID_REGNUM
2006 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2010 /* If there are saved registers but the LR isn't saved, then we need
2011 two instructions for the return. */
2012 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2015 /* Can't be done if any of the FPA regs are pushed,
2016 since this also requires an insn. */
2017 if (TARGET_HARD_FLOAT && TARGET_FPA)
2018 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2019 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2022 /* Likewise VFP regs. */
2023 if (TARGET_HARD_FLOAT && TARGET_VFP)
2024 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2025 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2028 if (TARGET_REALLY_IWMMXT)
2029 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2030 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2036 /* Return TRUE if int I is a valid immediate ARM constant. */
2039 const_ok_for_arm (HOST_WIDE_INT i)
2043 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2044 be all zero, or all one. */
2045 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2046 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2047 != ((~(unsigned HOST_WIDE_INT) 0)
2048 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2051 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2053 /* Fast return for 0 and small values. We must do this for zero, since
2054 the code below can't handle that one case. */
2055 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2058 /* Get the number of trailing zeros. */
2059 lowbit = ffs((int) i) - 1;
2061 /* Only even shifts are allowed in ARM mode so round down to the
2062 nearest even number. */
2066 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2071 /* Allow rotated constants in ARM mode. */
2073 && ((i & ~0xc000003f) == 0
2074 || (i & ~0xf000000f) == 0
2075 || (i & ~0xfc000003) == 0))
2082 /* Allow repeated pattern. */
2085 if (i == v || i == (v | (v << 8)))
2092 /* Return true if I is a valid constant for the operation CODE. */
2094 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2096 if (const_ok_for_arm (i))
2120 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2122 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2128 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2132 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2139 /* Emit a sequence of insns to handle a large constant.
2140 CODE is the code of the operation required, it can be any of SET, PLUS,
2141 IOR, AND, XOR, MINUS;
2142 MODE is the mode in which the operation is being performed;
2143 VAL is the integer to operate on;
2144 SOURCE is the other operand (a register, or a null-pointer for SET);
2145 SUBTARGETS means it is safe to create scratch registers if that will
2146 either produce a simpler sequence, or we will want to cse the values.
2147 Return value is the number of insns emitted. */
2149 /* ??? Tweak this for thumb2. */
2151 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2152 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2156 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2157 cond = COND_EXEC_TEST (PATTERN (insn));
2161 if (subtargets || code == SET
2162 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2163 && REGNO (target) != REGNO (source)))
2165 /* After arm_reorg has been called, we can't fix up expensive
2166 constants by pushing them into memory so we must synthesize
2167 them in-line, regardless of the cost. This is only likely to
2168 be more costly on chips that have load delay slots and we are
2169 compiling without running the scheduler (so no splitting
2170 occurred before the final instruction emission).
2172 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2174 if (!after_arm_reorg
2176 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2178 > arm_constant_limit + (code != SET)))
2182 /* Currently SET is the only monadic value for CODE, all
2183 the rest are diadic. */
2184 if (TARGET_USE_MOVT)
2185 arm_emit_movpair (target, GEN_INT (val));
2187 emit_set_insn (target, GEN_INT (val));
2193 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2195 if (TARGET_USE_MOVT)
2196 arm_emit_movpair (temp, GEN_INT (val));
2198 emit_set_insn (temp, GEN_INT (val));
2200 /* For MINUS, the value is subtracted from, since we never
2201 have subtraction of a constant. */
2203 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2205 emit_set_insn (target,
2206 gen_rtx_fmt_ee (code, mode, source, temp));
2212 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2216 /* Return the number of ARM instructions required to synthesize the given
2219 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2221 HOST_WIDE_INT temp1;
2229 if (remainder & (3 << (i - 2)))
2234 temp1 = remainder & ((0x0ff << end)
2235 | ((i < end) ? (0xff >> (32 - end)) : 0));
2236 remainder &= ~temp1;
2241 } while (remainder);
2245 /* Emit an instruction with the indicated PATTERN. If COND is
2246 non-NULL, conditionalize the execution of the instruction on COND
2250 emit_constant_insn (rtx cond, rtx pattern)
2253 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2254 emit_insn (pattern);
2257 /* As above, but extra parameter GENERATE which, if clear, suppresses
2259 /* ??? This needs more work for thumb2. */
2262 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2263 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2268 int can_negate_initial = 0;
2271 int num_bits_set = 0;
2272 int set_sign_bit_copies = 0;
2273 int clear_sign_bit_copies = 0;
2274 int clear_zero_bit_copies = 0;
2275 int set_zero_bit_copies = 0;
2277 unsigned HOST_WIDE_INT temp1, temp2;
2278 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2280 /* Find out which operations are safe for a given CODE. Also do a quick
2281 check for degenerate cases; these can occur when DImode operations
2293 can_negate_initial = 1;
2297 if (remainder == 0xffffffff)
2300 emit_constant_insn (cond,
2301 gen_rtx_SET (VOIDmode, target,
2302 GEN_INT (ARM_SIGN_EXTEND (val))));
2308 if (reload_completed && rtx_equal_p (target, source))
2312 emit_constant_insn (cond,
2313 gen_rtx_SET (VOIDmode, target, source));
2325 emit_constant_insn (cond,
2326 gen_rtx_SET (VOIDmode, target, const0_rtx));
2329 if (remainder == 0xffffffff)
2331 if (reload_completed && rtx_equal_p (target, source))
2334 emit_constant_insn (cond,
2335 gen_rtx_SET (VOIDmode, target, source));
2344 if (reload_completed && rtx_equal_p (target, source))
2347 emit_constant_insn (cond,
2348 gen_rtx_SET (VOIDmode, target, source));
2352 /* We don't know how to handle other cases yet. */
2353 gcc_assert (remainder == 0xffffffff);
2356 emit_constant_insn (cond,
2357 gen_rtx_SET (VOIDmode, target,
2358 gen_rtx_NOT (mode, source)));
2362 /* We treat MINUS as (val - source), since (source - val) is always
2363 passed as (source + (-val)). */
2367 emit_constant_insn (cond,
2368 gen_rtx_SET (VOIDmode, target,
2369 gen_rtx_NEG (mode, source)));
2372 if (const_ok_for_arm (val))
2375 emit_constant_insn (cond,
2376 gen_rtx_SET (VOIDmode, target,
2377 gen_rtx_MINUS (mode, GEN_INT (val),
2389 /* If we can do it in one insn get out quickly. */
2390 if (const_ok_for_arm (val)
2391 || (can_negate_initial && const_ok_for_arm (-val))
2392 || (can_invert && const_ok_for_arm (~val)))
2395 emit_constant_insn (cond,
2396 gen_rtx_SET (VOIDmode, target,
2398 ? gen_rtx_fmt_ee (code, mode, source,
2404 /* Calculate a few attributes that may be useful for specific
2406 /* Count number of leading zeros. */
2407 for (i = 31; i >= 0; i--)
2409 if ((remainder & (1 << i)) == 0)
2410 clear_sign_bit_copies++;
2415 /* Count number of leading 1's. */
2416 for (i = 31; i >= 0; i--)
2418 if ((remainder & (1 << i)) != 0)
2419 set_sign_bit_copies++;
2424 /* Count number of trailing zero's. */
2425 for (i = 0; i <= 31; i++)
2427 if ((remainder & (1 << i)) == 0)
2428 clear_zero_bit_copies++;
2433 /* Count number of trailing 1's. */
2434 for (i = 0; i <= 31; i++)
2436 if ((remainder & (1 << i)) != 0)
2437 set_zero_bit_copies++;
2445 /* See if we can use movw. */
2446 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2449 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2454 /* See if we can do this by sign_extending a constant that is known
2455 to be negative. This is a good, way of doing it, since the shift
2456 may well merge into a subsequent insn. */
2457 if (set_sign_bit_copies > 1)
2459 if (const_ok_for_arm
2460 (temp1 = ARM_SIGN_EXTEND (remainder
2461 << (set_sign_bit_copies - 1))))
2465 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2466 emit_constant_insn (cond,
2467 gen_rtx_SET (VOIDmode, new_src,
2469 emit_constant_insn (cond,
2470 gen_ashrsi3 (target, new_src,
2471 GEN_INT (set_sign_bit_copies - 1)));
2475 /* For an inverted constant, we will need to set the low bits,
2476 these will be shifted out of harm's way. */
2477 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2478 if (const_ok_for_arm (~temp1))
2482 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2483 emit_constant_insn (cond,
2484 gen_rtx_SET (VOIDmode, new_src,
2486 emit_constant_insn (cond,
2487 gen_ashrsi3 (target, new_src,
2488 GEN_INT (set_sign_bit_copies - 1)));
2494 /* See if we can calculate the value as the difference between two
2495 valid immediates. */
2496 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2498 int topshift = clear_sign_bit_copies & ~1;
2500 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2501 & (0xff000000 >> topshift));
2503 /* If temp1 is zero, then that means the 9 most significant
2504 bits of remainder were 1 and we've caused it to overflow.
2505 When topshift is 0 we don't need to do anything since we
2506 can borrow from 'bit 32'. */
2507 if (temp1 == 0 && topshift != 0)
2508 temp1 = 0x80000000 >> (topshift - 1);
2510 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2512 if (const_ok_for_arm (temp2))
2516 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2517 emit_constant_insn (cond,
2518 gen_rtx_SET (VOIDmode, new_src,
2520 emit_constant_insn (cond,
2521 gen_addsi3 (target, new_src,
2529 /* See if we can generate this by setting the bottom (or the top)
2530 16 bits, and then shifting these into the other half of the
2531 word. We only look for the simplest cases, to do more would cost
2532 too much. Be careful, however, not to generate this when the
2533 alternative would take fewer insns. */
2534 if (val & 0xffff0000)
2536 temp1 = remainder & 0xffff0000;
2537 temp2 = remainder & 0x0000ffff;
2539 /* Overlaps outside this range are best done using other methods. */
2540 for (i = 9; i < 24; i++)
2542 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2543 && !const_ok_for_arm (temp2))
2545 rtx new_src = (subtargets
2546 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2548 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2549 source, subtargets, generate);
2557 gen_rtx_ASHIFT (mode, source,
2564 /* Don't duplicate cases already considered. */
2565 for (i = 17; i < 24; i++)
2567 if (((temp1 | (temp1 >> i)) == remainder)
2568 && !const_ok_for_arm (temp1))
2570 rtx new_src = (subtargets
2571 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2573 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2574 source, subtargets, generate);
2579 gen_rtx_SET (VOIDmode, target,
2582 gen_rtx_LSHIFTRT (mode, source,
2593 /* If we have IOR or XOR, and the constant can be loaded in a
2594 single instruction, and we can find a temporary to put it in,
2595 then this can be done in two instructions instead of 3-4. */
2597 /* TARGET can't be NULL if SUBTARGETS is 0 */
2598 || (reload_completed && !reg_mentioned_p (target, source)))
2600 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2604 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2606 emit_constant_insn (cond,
2607 gen_rtx_SET (VOIDmode, sub,
2609 emit_constant_insn (cond,
2610 gen_rtx_SET (VOIDmode, target,
2611 gen_rtx_fmt_ee (code, mode,
2622 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2623 and the remainder 0s for e.g. 0xfff00000)
2624 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2626 This can be done in 2 instructions by using shifts with mov or mvn.
2631 mvn r0, r0, lsr #12 */
2632 if (set_sign_bit_copies > 8
2633 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2637 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2638 rtx shift = GEN_INT (set_sign_bit_copies);
2642 gen_rtx_SET (VOIDmode, sub,
2644 gen_rtx_ASHIFT (mode,
2649 gen_rtx_SET (VOIDmode, target,
2651 gen_rtx_LSHIFTRT (mode, sub,
2658 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2660 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2662 For eg. r0 = r0 | 0xfff
2667 if (set_zero_bit_copies > 8
2668 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2672 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2673 rtx shift = GEN_INT (set_zero_bit_copies);
2677 gen_rtx_SET (VOIDmode, sub,
2679 gen_rtx_LSHIFTRT (mode,
2684 gen_rtx_SET (VOIDmode, target,
2686 gen_rtx_ASHIFT (mode, sub,
2692 /* This will never be reached for Thumb2 because orn is a valid
2693 instruction. This is for Thumb1 and the ARM 32 bit cases.
2695 x = y | constant (such that ~constant is a valid constant)
2697 x = ~(~y & ~constant).
2699 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2703 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2704 emit_constant_insn (cond,
2705 gen_rtx_SET (VOIDmode, sub,
2706 gen_rtx_NOT (mode, source)));
2709 sub = gen_reg_rtx (mode);
2710 emit_constant_insn (cond,
2711 gen_rtx_SET (VOIDmode, sub,
2712 gen_rtx_AND (mode, source,
2714 emit_constant_insn (cond,
2715 gen_rtx_SET (VOIDmode, target,
2716 gen_rtx_NOT (mode, sub)));
2723 /* See if two shifts will do 2 or more insn's worth of work. */
2724 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2726 HOST_WIDE_INT shift_mask = ((0xffffffff
2727 << (32 - clear_sign_bit_copies))
2730 if ((remainder | shift_mask) != 0xffffffff)
2734 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2735 insns = arm_gen_constant (AND, mode, cond,
2736 remainder | shift_mask,
2737 new_src, source, subtargets, 1);
2742 rtx targ = subtargets ? NULL_RTX : target;
2743 insns = arm_gen_constant (AND, mode, cond,
2744 remainder | shift_mask,
2745 targ, source, subtargets, 0);
2751 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2752 rtx shift = GEN_INT (clear_sign_bit_copies);
2754 emit_insn (gen_ashlsi3 (new_src, source, shift));
2755 emit_insn (gen_lshrsi3 (target, new_src, shift));
2761 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2763 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2765 if ((remainder | shift_mask) != 0xffffffff)
2769 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2771 insns = arm_gen_constant (AND, mode, cond,
2772 remainder | shift_mask,
2773 new_src, source, subtargets, 1);
2778 rtx targ = subtargets ? NULL_RTX : target;
2780 insns = arm_gen_constant (AND, mode, cond,
2781 remainder | shift_mask,
2782 targ, source, subtargets, 0);
2788 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2789 rtx shift = GEN_INT (clear_zero_bit_copies);
2791 emit_insn (gen_lshrsi3 (new_src, source, shift));
2792 emit_insn (gen_ashlsi3 (target, new_src, shift));
2804 for (i = 0; i < 32; i++)
2805 if (remainder & (1 << i))
2809 || (code != IOR && can_invert && num_bits_set > 16))
2810 remainder = (~remainder) & 0xffffffff;
2811 else if (code == PLUS && num_bits_set > 16)
2812 remainder = (-remainder) & 0xffffffff;
2819 /* Now try and find a way of doing the job in either two or three
2821 We start by looking for the largest block of zeros that are aligned on
2822 a 2-bit boundary, we then fill up the temps, wrapping around to the
2823 top of the word when we drop off the bottom.
2824 In the worst case this code should produce no more than four insns.
2825 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2826 best place to start. */
2828 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2834 int best_consecutive_zeros = 0;
2836 for (i = 0; i < 32; i += 2)
2838 int consecutive_zeros = 0;
2840 if (!(remainder & (3 << i)))
2842 while ((i < 32) && !(remainder & (3 << i)))
2844 consecutive_zeros += 2;
2847 if (consecutive_zeros > best_consecutive_zeros)
2849 best_consecutive_zeros = consecutive_zeros;
2850 best_start = i - consecutive_zeros;
2856 /* So long as it won't require any more insns to do so, it's
2857 desirable to emit a small constant (in bits 0...9) in the last
2858 insn. This way there is more chance that it can be combined with
2859 a later addressing insn to form a pre-indexed load or store
2860 operation. Consider:
2862 *((volatile int *)0xe0000100) = 1;
2863 *((volatile int *)0xe0000110) = 2;
2865 We want this to wind up as:
2869 str rB, [rA, #0x100]
2871 str rB, [rA, #0x110]
2873 rather than having to synthesize both large constants from scratch.
2875 Therefore, we calculate how many insns would be required to emit
2876 the constant starting from `best_start', and also starting from
2877 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2878 yield a shorter sequence, we may as well use zero. */
2880 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2881 && (count_insns_for_constant (remainder, 0) <=
2882 count_insns_for_constant (remainder, best_start)))
2886 /* Now start emitting the insns. */
2894 if (remainder & (3 << (i - 2)))
2899 temp1 = remainder & ((0x0ff << end)
2900 | ((i < end) ? (0xff >> (32 - end)) : 0));
2901 remainder &= ~temp1;
2905 rtx new_src, temp1_rtx;
2907 if (code == SET || code == MINUS)
2909 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2910 if (can_invert && code != MINUS)
2915 if (remainder && subtargets)
2916 new_src = gen_reg_rtx (mode);
2921 else if (can_negate)
2925 temp1 = trunc_int_for_mode (temp1, mode);
2926 temp1_rtx = GEN_INT (temp1);
2930 else if (code == MINUS)
2931 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2933 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2935 emit_constant_insn (cond,
2936 gen_rtx_SET (VOIDmode, new_src,
2946 else if (code == MINUS)
2955 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
2968 /* Canonicalize a comparison so that we are more likely to recognize it.
2969 This can be done for a few constant compares, where we can make the
2970 immediate value easier to load. */
2973 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2976 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2977 unsigned HOST_WIDE_INT maxval;
2978 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2989 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2991 *op1 = GEN_INT (i + 1);
2992 return code == GT ? GE : LT;
2999 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3001 *op1 = GEN_INT (i - 1);
3002 return code == GE ? GT : LE;
3008 if (i != ~((unsigned HOST_WIDE_INT) 0)
3009 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3011 *op1 = GEN_INT (i + 1);
3012 return code == GTU ? GEU : LTU;
3019 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3021 *op1 = GEN_INT (i - 1);
3022 return code == GEU ? GTU : LEU;
3034 /* Define how to find the value returned by a function. */
3037 arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
3039 enum machine_mode mode;
3040 int unsignedp ATTRIBUTE_UNUSED;
3041 rtx r ATTRIBUTE_UNUSED;
3043 mode = TYPE_MODE (type);
3044 /* Promote integer types. */
3045 if (INTEGRAL_TYPE_P (type))
3046 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
3048 /* Promotes small structs returned in a register to full-word size
3049 for big-endian AAPCS. */
3050 if (arm_return_in_msb (type))
3052 HOST_WIDE_INT size = int_size_in_bytes (type);
3053 if (size % UNITS_PER_WORD != 0)
3055 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3056 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3060 return LIBCALL_VALUE(mode);
3063 /* Determine the amount of memory needed to store the possible return
3064 registers of an untyped call. */
3066 arm_apply_result_size (void)
3072 if (TARGET_HARD_FLOAT_ABI)
3076 if (TARGET_MAVERICK)
3079 if (TARGET_IWMMXT_ABI)
3086 /* Decide whether a type should be returned in memory (true)
3087 or in a register (false). This is called as the target hook
3088 TARGET_RETURN_IN_MEMORY. */
3090 arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
3094 size = int_size_in_bytes (type);
3096 /* Vector values should be returned using ARM registers, not memory (unless
3097 they're over 16 bytes, which will break since we only have four
3098 call-clobbered registers to play with). */
3099 if (TREE_CODE (type) == VECTOR_TYPE)
3100 return (size < 0 || size > (4 * UNITS_PER_WORD));
3102 if (!AGGREGATE_TYPE_P (type) &&
3103 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
3104 /* All simple types are returned in registers.
3105 For AAPCS, complex types are treated the same as aggregates. */
3108 if (arm_abi != ARM_ABI_APCS)
3110 /* ATPCS and later return aggregate types in memory only if they are
3111 larger than a word (or are variable size). */
3112 return (size < 0 || size > UNITS_PER_WORD);
3115 /* For the arm-wince targets we choose to be compatible with Microsoft's
3116 ARM and Thumb compilers, which always return aggregates in memory. */
3118 /* All structures/unions bigger than one word are returned in memory.
3119 Also catch the case where int_size_in_bytes returns -1. In this case
3120 the aggregate is either huge or of variable size, and in either case
3121 we will want to return it via memory and not in a register. */
3122 if (size < 0 || size > UNITS_PER_WORD)
3125 if (TREE_CODE (type) == RECORD_TYPE)
3129 /* For a struct the APCS says that we only return in a register
3130 if the type is 'integer like' and every addressable element
3131 has an offset of zero. For practical purposes this means
3132 that the structure can have at most one non bit-field element
3133 and that this element must be the first one in the structure. */
3135 /* Find the first field, ignoring non FIELD_DECL things which will
3136 have been created by C++. */
3137 for (field = TYPE_FIELDS (type);
3138 field && TREE_CODE (field) != FIELD_DECL;
3139 field = TREE_CHAIN (field))
3143 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
3145 /* Check that the first field is valid for returning in a register. */
3147 /* ... Floats are not allowed */
3148 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3151 /* ... Aggregates that are not themselves valid for returning in
3152 a register are not allowed. */
3153 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3156 /* Now check the remaining fields, if any. Only bitfields are allowed,
3157 since they are not addressable. */
3158 for (field = TREE_CHAIN (field);
3160 field = TREE_CHAIN (field))
3162 if (TREE_CODE (field) != FIELD_DECL)
3165 if (!DECL_BIT_FIELD_TYPE (field))
3172 if (TREE_CODE (type) == UNION_TYPE)
3176 /* Unions can be returned in registers if every element is
3177 integral, or can be returned in an integer register. */
3178 for (field = TYPE_FIELDS (type);
3180 field = TREE_CHAIN (field))
3182 if (TREE_CODE (field) != FIELD_DECL)
3185 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3188 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3194 #endif /* not ARM_WINCE */
3196 /* Return all other types in memory. */
3200 /* Indicate whether or not words of a double are in big-endian order. */
3203 arm_float_words_big_endian (void)
3205 if (TARGET_MAVERICK)
3208 /* For FPA, float words are always big-endian. For VFP, floats words
3209 follow the memory system mode. */
3217 return (TARGET_BIG_END ? 1 : 0);
3222 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3223 for a call to a function whose data type is FNTYPE.
3224 For a library call, FNTYPE is NULL. */
3226 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
3227 rtx libname ATTRIBUTE_UNUSED,
3228 tree fndecl ATTRIBUTE_UNUSED)
3230 /* On the ARM, the offset starts at 0. */
3232 pcum->iwmmxt_nregs = 0;
3233 pcum->can_split = true;
3235 /* Varargs vectors are treated the same as long long.
3236 named_count avoids having to change the way arm handles 'named' */
3237 pcum->named_count = 0;
3240 if (TARGET_REALLY_IWMMXT && fntype)
3244 for (fn_arg = TYPE_ARG_TYPES (fntype);
3246 fn_arg = TREE_CHAIN (fn_arg))
3247 pcum->named_count += 1;
3249 if (! pcum->named_count)
3250 pcum->named_count = INT_MAX;
3255 /* Return true if mode/type need doubleword alignment. */
3257 arm_needs_doubleword_align (enum machine_mode mode, tree type)
3259 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
3260 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
3264 /* Determine where to put an argument to a function.
3265 Value is zero to push the argument on the stack,
3266 or a hard register in which to store the argument.
3268 MODE is the argument's machine mode.
3269 TYPE is the data type of the argument (as a tree).
3270 This is null for libcalls where that information may
3272 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3273 the preceding args and about the function being called.
3274 NAMED is nonzero if this argument is a named parameter
3275 (otherwise it is an extra parameter matching an ellipsis). */
3278 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3279 tree type, int named)
3283 /* Varargs vectors are treated the same as long long.
3284 named_count avoids having to change the way arm handles 'named' */
3285 if (TARGET_IWMMXT_ABI
3286 && arm_vector_mode_supported_p (mode)
3287 && pcum->named_count > pcum->nargs + 1)
3289 if (pcum->iwmmxt_nregs <= 9)
3290 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
3293 pcum->can_split = false;
3298 /* Put doubleword aligned quantities in even register pairs. */
3300 && ARM_DOUBLEWORD_ALIGN
3301 && arm_needs_doubleword_align (mode, type))
3304 if (mode == VOIDmode)
3305 /* Pick an arbitrary value for operand 2 of the call insn. */
3308 /* Only allow splitting an arg between regs and memory if all preceding
3309 args were allocated to regs. For args passed by reference we only count
3310 the reference pointer. */
3311 if (pcum->can_split)
3314 nregs = ARM_NUM_REGS2 (mode, type);
3316 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
3319 return gen_rtx_REG (mode, pcum->nregs);
3323 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3324 tree type, bool named ATTRIBUTE_UNUSED)
3326 int nregs = pcum->nregs;
3328 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
3331 if (NUM_ARG_REGS > nregs
3332 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
3334 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
3339 /* Variable sized types are passed by reference. This is a GCC
3340 extension to the ARM ABI. */
3343 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3344 enum machine_mode mode ATTRIBUTE_UNUSED,
3345 const_tree type, bool named ATTRIBUTE_UNUSED)
3347 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3350 /* Encode the current state of the #pragma [no_]long_calls. */
3353 OFF, /* No #pragma [no_]long_calls is in effect. */
3354 LONG, /* #pragma long_calls is in effect. */
3355 SHORT /* #pragma no_long_calls is in effect. */
3358 static arm_pragma_enum arm_pragma_long_calls = OFF;
3361 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3363 arm_pragma_long_calls = LONG;
3367 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3369 arm_pragma_long_calls = SHORT;
3373 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3375 arm_pragma_long_calls = OFF;
3378 /* Table of machine attributes. */
3379 const struct attribute_spec arm_attribute_table[] =
3381 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
3382 /* Function calls made to this symbol must be done indirectly, because
3383 it may lie outside of the 26 bit addressing range of a normal function
3385 { "long_call", 0, 0, false, true, true, NULL },
3386 /* Whereas these functions are always known to reside within the 26 bit
3387 addressing range. */
3388 { "short_call", 0, 0, false, true, true, NULL },
3389 /* Interrupt Service Routines have special prologue and epilogue requirements. */
3390 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
3391 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
3392 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3394 /* ARM/PE has three new attributes:
3396 dllexport - for exporting a function/variable that will live in a dll
3397 dllimport - for importing a function/variable from a dll
3399 Microsoft allows multiple declspecs in one __declspec, separating
3400 them with spaces. We do NOT support this. Instead, use __declspec
3403 { "dllimport", 0, 0, true, false, false, NULL },
3404 { "dllexport", 0, 0, true, false, false, NULL },
3405 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3406 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
3407 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
3408 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3409 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
3411 { NULL, 0, 0, false, false, false, NULL }
3414 /* Handle an attribute requiring a FUNCTION_DECL;
3415 arguments as in struct attribute_spec.handler. */
3417 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
3418 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
3420 if (TREE_CODE (*node) != FUNCTION_DECL)
3422 warning (OPT_Wattributes, "%qE attribute only applies to functions",
3424 *no_add_attrs = true;
3430 /* Handle an "interrupt" or "isr" attribute;
3431 arguments as in struct attribute_spec.handler. */
3433 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
3438 if (TREE_CODE (*node) != FUNCTION_DECL)
3440 warning (OPT_Wattributes, "%qE attribute only applies to functions",
3442 *no_add_attrs = true;
3444 /* FIXME: the argument if any is checked for type attributes;
3445 should it be checked for decl ones? */
3449 if (TREE_CODE (*node) == FUNCTION_TYPE
3450 || TREE_CODE (*node) == METHOD_TYPE)
3452 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
3454 warning (OPT_Wattributes, "%qE attribute ignored",
3456 *no_add_attrs = true;
3459 else if (TREE_CODE (*node) == POINTER_TYPE
3460 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
3461 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
3462 && arm_isr_value (args) != ARM_FT_UNKNOWN)
3464 *node = build_variant_type_copy (*node);
3465 TREE_TYPE (*node) = build_type_attribute_variant
3467 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
3468 *no_add_attrs = true;
3472 /* Possibly pass this attribute on from the type to a decl. */
3473 if (flags & ((int) ATTR_FLAG_DECL_NEXT
3474 | (int) ATTR_FLAG_FUNCTION_NEXT
3475 | (int) ATTR_FLAG_ARRAY_NEXT))
3477 *no_add_attrs = true;
3478 return tree_cons (name, args, NULL_TREE);
3482 warning (OPT_Wattributes, "%qE attribute ignored",
3491 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
3492 /* Handle the "notshared" attribute. This attribute is another way of
3493 requesting hidden visibility. ARM's compiler supports
3494 "__declspec(notshared)"; we support the same thing via an
3498 arm_handle_notshared_attribute (tree *node,
3499 tree name ATTRIBUTE_UNUSED,
3500 tree args ATTRIBUTE_UNUSED,
3501 int flags ATTRIBUTE_UNUSED,
3504 tree decl = TYPE_NAME (*node);
3508 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
3509 DECL_VISIBILITY_SPECIFIED (decl) = 1;
3510 *no_add_attrs = false;
3516 /* Return 0 if the attributes for two types are incompatible, 1 if they
3517 are compatible, and 2 if they are nearly compatible (which causes a
3518 warning to be generated). */
3520 arm_comp_type_attributes (const_tree type1, const_tree type2)
3524 /* Check for mismatch of non-default calling convention. */
3525 if (TREE_CODE (type1) != FUNCTION_TYPE)
3528 /* Check for mismatched call attributes. */
3529 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
3530 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
3531 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
3532 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
3534 /* Only bother to check if an attribute is defined. */
3535 if (l1 | l2 | s1 | s2)
3537 /* If one type has an attribute, the other must have the same attribute. */
3538 if ((l1 != l2) || (s1 != s2))
3541 /* Disallow mixed attributes. */
3542 if ((l1 & s2) || (l2 & s1))
3546 /* Check for mismatched ISR attribute. */
3547 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3549 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3550 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3552 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3559 /* Assigns default attributes to newly defined type. This is used to
3560 set short_call/long_call attributes for function types of
3561 functions defined inside corresponding #pragma scopes. */
3563 arm_set_default_type_attributes (tree type)
3565 /* Add __attribute__ ((long_call)) to all functions, when
3566 inside #pragma long_calls or __attribute__ ((short_call)),
3567 when inside #pragma no_long_calls. */
3568 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3570 tree type_attr_list, attr_name;
3571 type_attr_list = TYPE_ATTRIBUTES (type);
3573 if (arm_pragma_long_calls == LONG)
3574 attr_name = get_identifier ("long_call");
3575 else if (arm_pragma_long_calls == SHORT)
3576 attr_name = get_identifier ("short_call");
3580 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3581 TYPE_ATTRIBUTES (type) = type_attr_list;
3585 /* Return true if DECL is known to be linked into section SECTION. */
3588 arm_function_in_section_p (tree decl, section *section)
3590 /* We can only be certain about functions defined in the same
3591 compilation unit. */
3592 if (!TREE_STATIC (decl))
3595 /* Make sure that SYMBOL always binds to the definition in this
3596 compilation unit. */
3597 if (!targetm.binds_local_p (decl))
3600 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
3601 if (!DECL_SECTION_NAME (decl))
3603 /* Make sure that we will not create a unique section for DECL. */
3604 if (flag_function_sections || DECL_ONE_ONLY (decl))
3608 return function_section (decl) == section;
3611 /* Return nonzero if a 32-bit "long_call" should be generated for
3612 a call from the current function to DECL. We generate a long_call
3615 a. has an __attribute__((long call))
3616 or b. is within the scope of a #pragma long_calls
3617 or c. the -mlong-calls command line switch has been specified
3619 However we do not generate a long call if the function:
3621 d. has an __attribute__ ((short_call))
3622 or e. is inside the scope of a #pragma no_long_calls
3623 or f. is defined in the same section as the current function. */
3626 arm_is_long_call_p (tree decl)
3631 return TARGET_LONG_CALLS;
3633 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
3634 if (lookup_attribute ("short_call", attrs))
3637 /* For "f", be conservative, and only cater for cases in which the
3638 whole of the current function is placed in the same section. */
3639 if (!flag_reorder_blocks_and_partition
3640 && TREE_CODE (decl) == FUNCTION_DECL
3641 && arm_function_in_section_p (decl, current_function_section ()))
3644 if (lookup_attribute ("long_call", attrs))
3647 return TARGET_LONG_CALLS;
3650 /* Return nonzero if it is ok to make a tail-call to DECL. */
3652 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3654 unsigned long func_type;
3656 if (cfun->machine->sibcall_blocked)
3659 /* Never tailcall something for which we have no decl, or if we
3660 are in Thumb mode. */
3661 if (decl == NULL || TARGET_THUMB)
3664 /* The PIC register is live on entry to VxWorks PLT entries, so we
3665 must make the call before restoring the PIC register. */
3666 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
3669 /* Cannot tail-call to long calls, since these are out of range of
3670 a branch instruction. */
3671 if (arm_is_long_call_p (decl))
3674 /* If we are interworking and the function is not declared static
3675 then we can't tail-call it unless we know that it exists in this
3676 compilation unit (since it might be a Thumb routine). */
3677 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3680 func_type = arm_current_func_type ();
3681 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3682 if (IS_INTERRUPT (func_type))
3685 /* Never tailcall if function may be called with a misaligned SP. */
3686 if (IS_STACKALIGN (func_type))
3689 /* Everything else is ok. */
3694 /* Addressing mode support functions. */
3696 /* Return nonzero if X is a legitimate immediate operand when compiling
3697 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
3699 legitimate_pic_operand_p (rtx x)
3701 if (GET_CODE (x) == SYMBOL_REF
3702 || (GET_CODE (x) == CONST
3703 && GET_CODE (XEXP (x, 0)) == PLUS
3704 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3710 /* Record that the current function needs a PIC register. Initialize
3711 cfun->machine->pic_reg if we have not already done so. */
3714 require_pic_register (void)
3716 /* A lot of the logic here is made obscure by the fact that this
3717 routine gets called as part of the rtx cost estimation process.
3718 We don't want those calls to affect any assumptions about the real
3719 function; and further, we can't call entry_of_function() until we
3720 start the real expansion process. */
3721 if (!crtl->uses_pic_offset_table)
3723 gcc_assert (can_create_pseudo_p ());
3724 if (arm_pic_register != INVALID_REGNUM)
3726 if (!cfun->machine->pic_reg)
3727 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3729 /* Play games to avoid marking the function as needing pic
3730 if we are being called as part of the cost-estimation
3732 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
3733 crtl->uses_pic_offset_table = 1;
3739 if (!cfun->machine->pic_reg)
3740 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3742 /* Play games to avoid marking the function as needing pic
3743 if we are being called as part of the cost-estimation
3745 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
3747 crtl->uses_pic_offset_table = 1;
3750 arm_load_pic_register (0UL);
3754 /* We can be called during expansion of PHI nodes, where
3755 we can't yet emit instructions directly in the final
3756 insn stream. Queue the insns on the entry edge, they will
3757 be committed after everything else is expanded. */
3758 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
3765 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3767 if (GET_CODE (orig) == SYMBOL_REF
3768 || GET_CODE (orig) == LABEL_REF)
3770 rtx pic_ref, address;
3774 /* If this function doesn't have a pic register, create one now. */
3775 require_pic_register ();
3779 gcc_assert (can_create_pseudo_p ());
3780 reg = gen_reg_rtx (Pmode);
3786 address = gen_reg_rtx (Pmode);
3791 emit_insn (gen_pic_load_addr_arm (address, orig));
3792 else if (TARGET_THUMB2)
3793 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
3794 else /* TARGET_THUMB1 */
3795 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
3797 /* VxWorks does not impose a fixed gap between segments; the run-time
3798 gap can be different from the object-file gap. We therefore can't
3799 use GOTOFF unless we are absolutely sure that the symbol is in the
3800 same segment as the GOT. Unfortunately, the flexibility of linker
3801 scripts means that we can't be sure of that in general, so assume
3802 that GOTOFF is never valid on VxWorks. */
3803 if ((GET_CODE (orig) == LABEL_REF
3804 || (GET_CODE (orig) == SYMBOL_REF &&
3805 SYMBOL_REF_LOCAL_P (orig)))
3807 && !TARGET_VXWORKS_RTP)
3808 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3811 pic_ref = gen_const_mem (Pmode,
3812 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3816 insn = emit_move_insn (reg, pic_ref);
3818 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3820 set_unique_reg_note (insn, REG_EQUAL, orig);
3824 else if (GET_CODE (orig) == CONST)
3828 if (GET_CODE (XEXP (orig, 0)) == PLUS
3829 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3832 /* Handle the case where we have: const (UNSPEC_TLS). */
3833 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3834 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3837 /* Handle the case where we have:
3838 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
3840 if (GET_CODE (XEXP (orig, 0)) == PLUS
3841 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
3842 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
3844 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
3850 gcc_assert (can_create_pseudo_p ());
3851 reg = gen_reg_rtx (Pmode);
3854 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3856 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3857 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3858 base == reg ? 0 : reg);
3860 if (GET_CODE (offset) == CONST_INT)
3862 /* The base register doesn't really matter, we only want to
3863 test the index for the appropriate mode. */
3864 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3866 gcc_assert (can_create_pseudo_p ());
3867 offset = force_reg (Pmode, offset);
3870 if (GET_CODE (offset) == CONST_INT)
3871 return plus_constant (base, INTVAL (offset));
3874 if (GET_MODE_SIZE (mode) > 4
3875 && (GET_MODE_CLASS (mode) == MODE_INT
3876 || TARGET_SOFT_FLOAT))
3878 emit_insn (gen_addsi3 (reg, base, offset));
3882 return gen_rtx_PLUS (Pmode, base, offset);
3889 /* Find a spare register to use during the prolog of a function. */
3892 thumb_find_work_register (unsigned long pushed_regs_mask)
3896 /* Check the argument registers first as these are call-used. The
3897 register allocation order means that sometimes r3 might be used
3898 but earlier argument registers might not, so check them all. */
3899 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3900 if (!df_regs_ever_live_p (reg))
3903 /* Before going on to check the call-saved registers we can try a couple
3904 more ways of deducing that r3 is available. The first is when we are
3905 pushing anonymous arguments onto the stack and we have less than 4
3906 registers worth of fixed arguments(*). In this case r3 will be part of
3907 the variable argument list and so we can be sure that it will be
3908 pushed right at the start of the function. Hence it will be available
3909 for the rest of the prologue.
3910 (*): ie crtl->args.pretend_args_size is greater than 0. */
3911 if (cfun->machine->uses_anonymous_args
3912 && crtl->args.pretend_args_size > 0)
3913 return LAST_ARG_REGNUM;
3915 /* The other case is when we have fixed arguments but less than 4 registers
3916 worth. In this case r3 might be used in the body of the function, but
3917 it is not being used to convey an argument into the function. In theory
3918 we could just check crtl->args.size to see how many bytes are
3919 being passed in argument registers, but it seems that it is unreliable.
3920 Sometimes it will have the value 0 when in fact arguments are being
3921 passed. (See testcase execute/20021111-1.c for an example). So we also
3922 check the args_info.nregs field as well. The problem with this field is
3923 that it makes no allowances for arguments that are passed to the
3924 function but which are not used. Hence we could miss an opportunity
3925 when a function has an unused argument in r3. But it is better to be
3926 safe than to be sorry. */
3927 if (! cfun->machine->uses_anonymous_args
3928 && crtl->args.size >= 0
3929 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3930 && crtl->args.info.nregs < 4)
3931 return LAST_ARG_REGNUM;
3933 /* Otherwise look for a call-saved register that is going to be pushed. */
3934 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3935 if (pushed_regs_mask & (1 << reg))
3940 /* Thumb-2 can use high regs. */
3941 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
3942 if (pushed_regs_mask & (1 << reg))
3945 /* Something went wrong - thumb_compute_save_reg_mask()
3946 should have arranged for a suitable register to be pushed. */
3950 static GTY(()) int pic_labelno;
3952 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3956 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3958 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
3960 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3963 gcc_assert (flag_pic);
3965 pic_reg = cfun->machine->pic_reg;
3966 if (TARGET_VXWORKS_RTP)
3968 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
3969 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3970 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3972 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
3974 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
3975 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
3979 /* We use an UNSPEC rather than a LABEL_REF because this label
3980 never appears in the code stream. */
3982 labelno = GEN_INT (pic_labelno++);
3983 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3984 l1 = gen_rtx_CONST (VOIDmode, l1);
3986 /* On the ARM the PC register contains 'dot + 8' at the time of the
3987 addition, on the Thumb it is 'dot + 4'. */
3988 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
3989 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
3991 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3995 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3996 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
3998 else if (TARGET_THUMB2)
4000 /* Thumb-2 only allows very limited access to the PC. Calculate the
4001 address in a temporary register. */
4002 if (arm_pic_register != INVALID_REGNUM)
4004 pic_tmp = gen_rtx_REG (SImode,
4005 thumb_find_work_register (saved_regs));
4009 gcc_assert (can_create_pseudo_p ());
4010 pic_tmp = gen_reg_rtx (Pmode);
4013 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
4014 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
4015 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
4017 else /* TARGET_THUMB1 */
4019 if (arm_pic_register != INVALID_REGNUM
4020 && REGNO (pic_reg) > LAST_LO_REGNUM)
4022 /* We will have pushed the pic register, so we should always be
4023 able to find a work register. */
4024 pic_tmp = gen_rtx_REG (SImode,
4025 thumb_find_work_register (saved_regs));
4026 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
4027 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
4030 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
4031 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
4035 /* Need to emit this whether or not we obey regdecls,
4036 since setjmp/longjmp can cause life info to screw up. */
4041 /* Return nonzero if X is valid as an ARM state addressing register. */
4043 arm_address_register_rtx_p (rtx x, int strict_p)
4047 if (GET_CODE (x) != REG)
4053 return ARM_REGNO_OK_FOR_BASE_P (regno);
4055 return (regno <= LAST_ARM_REGNUM
4056 || regno >= FIRST_PSEUDO_REGISTER
4057 || regno == FRAME_POINTER_REGNUM
4058 || regno == ARG_POINTER_REGNUM);
4061 /* Return TRUE if this rtx is the difference of a symbol and a label,
4062 and will reduce to a PC-relative relocation in the object file.
4063 Expressions like this can be left alone when generating PIC, rather
4064 than forced through the GOT. */
4066 pcrel_constant_p (rtx x)
4068 if (GET_CODE (x) == MINUS)
4069 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
4074 /* Return nonzero if X is a valid ARM state address operand. */
4076 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
4080 enum rtx_code code = GET_CODE (x);
4082 if (arm_address_register_rtx_p (x, strict_p))
4085 use_ldrd = (TARGET_LDRD
4087 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
4089 if (code == POST_INC || code == PRE_DEC
4090 || ((code == PRE_INC || code == POST_DEC)
4091 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
4092 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
4094 else if ((code == POST_MODIFY || code == PRE_MODIFY)
4095 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
4096 && GET_CODE (XEXP (x, 1)) == PLUS
4097 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
4099 rtx addend = XEXP (XEXP (x, 1), 1);
4101 /* Don't allow ldrd post increment by register because it's hard
4102 to fixup invalid register choices. */
4104 && GET_CODE (x) == POST_MODIFY
4105 && GET_CODE (addend) == REG)
4108 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
4109 && arm_legitimate_index_p (mode, addend, outer, strict_p));
4112 /* After reload constants split into minipools will have addresses
4113 from a LABEL_REF. */
4114 else if (reload_completed
4115 && (code == LABEL_REF
4117 && GET_CODE (XEXP (x, 0)) == PLUS
4118 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4119 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4122 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
4125 else if (code == PLUS)
4127 rtx xop0 = XEXP (x, 0);
4128 rtx xop1 = XEXP (x, 1);
4130 return ((arm_address_register_rtx_p (xop0, strict_p)
4131 && GET_CODE(xop1) == CONST_INT
4132 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
4133 || (arm_address_register_rtx_p (xop1, strict_p)
4134 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
4138 /* Reload currently can't handle MINUS, so disable this for now */
4139 else if (GET_CODE (x) == MINUS)
4141 rtx xop0 = XEXP (x, 0);
4142 rtx xop1 = XEXP (x, 1);
4144 return (arm_address_register_rtx_p (xop0, strict_p)
4145 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
4149 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4150 && code == SYMBOL_REF
4151 && CONSTANT_POOL_ADDRESS_P (x)
4153 && symbol_mentioned_p (get_pool_constant (x))
4154 && ! pcrel_constant_p (get_pool_constant (x))))
4160 /* Return nonzero if X is a valid Thumb-2 address operand. */
4162 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4165 enum rtx_code code = GET_CODE (x);
4167 if (arm_address_register_rtx_p (x, strict_p))
4170 use_ldrd = (TARGET_LDRD
4172 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
4174 if (code == POST_INC || code == PRE_DEC
4175 || ((code == PRE_INC || code == POST_DEC)
4176 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
4177 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
4179 else if ((code == POST_MODIFY || code == PRE_MODIFY)
4180 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
4181 && GET_CODE (XEXP (x, 1)) == PLUS
4182 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
4184 /* Thumb-2 only has autoincrement by constant. */
4185 rtx addend = XEXP (XEXP (x, 1), 1);
4186 HOST_WIDE_INT offset;
4188 if (GET_CODE (addend) != CONST_INT)
4191 offset = INTVAL(addend);
4192 if (GET_MODE_SIZE (mode) <= 4)
4193 return (offset > -256 && offset < 256);
4195 return (use_ldrd && offset > -1024 && offset < 1024
4196 && (offset & 3) == 0);
4199 /* After reload constants split into minipools will have addresses
4200 from a LABEL_REF. */
4201 else if (reload_completed
4202 && (code == LABEL_REF
4204 && GET_CODE (XEXP (x, 0)) == PLUS
4205 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4206 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4209 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
4212 else if (code == PLUS)
4214 rtx xop0 = XEXP (x, 0);
4215 rtx xop1 = XEXP (x, 1);
4217 return ((arm_address_register_rtx_p (xop0, strict_p)
4218 && thumb2_legitimate_index_p (mode, xop1, strict_p))
4219 || (arm_address_register_rtx_p (xop1, strict_p)
4220 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
4223 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4224 && code == SYMBOL_REF
4225 && CONSTANT_POOL_ADDRESS_P (x)
4227 && symbol_mentioned_p (get_pool_constant (x))
4228 && ! pcrel_constant_p (get_pool_constant (x))))
4234 /* Return nonzero if INDEX is valid for an address index operand in
4237 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
4240 HOST_WIDE_INT range;
4241 enum rtx_code code = GET_CODE (index);
4243 /* Standard coprocessor addressing modes. */
4244 if (TARGET_HARD_FLOAT
4245 && (TARGET_FPA || TARGET_MAVERICK)
4246 && (GET_MODE_CLASS (mode) == MODE_FLOAT
4247 || (TARGET_MAVERICK && mode == DImode)))
4248 return (code == CONST_INT && INTVAL (index) < 1024
4249 && INTVAL (index) > -1024
4250 && (INTVAL (index) & 3) == 0);
4253 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4254 return (code == CONST_INT
4255 && INTVAL (index) < 1016
4256 && INTVAL (index) > -1024
4257 && (INTVAL (index) & 3) == 0);
4259 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4260 return (code == CONST_INT
4261 && INTVAL (index) < 1024
4262 && INTVAL (index) > -1024
4263 && (INTVAL (index) & 3) == 0);
4265 if (arm_address_register_rtx_p (index, strict_p)
4266 && (GET_MODE_SIZE (mode) <= 4))
4269 if (mode == DImode || mode == DFmode)
4271 if (code == CONST_INT)
4273 HOST_WIDE_INT val = INTVAL (index);
4276 return val > -256 && val < 256;
4278 return val > -4096 && val < 4092;
4281 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
4284 if (GET_MODE_SIZE (mode) <= 4
4288 || (mode == QImode && outer == SIGN_EXTEND))))
4292 rtx xiop0 = XEXP (index, 0);
4293 rtx xiop1 = XEXP (index, 1);
4295 return ((arm_address_register_rtx_p (xiop0, strict_p)
4296 && power_of_two_operand (xiop1, SImode))
4297 || (arm_address_register_rtx_p (xiop1, strict_p)
4298 && power_of_two_operand (xiop0, SImode)));
4300 else if (code == LSHIFTRT || code == ASHIFTRT
4301 || code == ASHIFT || code == ROTATERT)
4303 rtx op = XEXP (index, 1);
4305 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4306 && GET_CODE (op) == CONST_INT
4308 && INTVAL (op) <= 31);
4312 /* For ARM v4 we may be doing a sign-extend operation during the
4318 || (outer == SIGN_EXTEND && mode == QImode))
4324 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
4326 return (code == CONST_INT
4327 && INTVAL (index) < range
4328 && INTVAL (index) > -range);
4331 /* Return true if OP is a valid index scaling factor for Thumb-2 address
4332 index operand. i.e. 1, 2, 4 or 8. */
4334 thumb2_index_mul_operand (rtx op)
4338 if (GET_CODE(op) != CONST_INT)
4342 return (val == 1 || val == 2 || val == 4 || val == 8);
4345 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
4347 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
4349 enum rtx_code code = GET_CODE (index);
4351 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
4352 /* Standard coprocessor addressing modes. */
4353 if (TARGET_HARD_FLOAT
4354 && (TARGET_FPA || TARGET_MAVERICK)
4355 && (GET_MODE_CLASS (mode) == MODE_FLOAT
4356 || (TARGET_MAVERICK && mode == DImode)))
4357 return (code == CONST_INT && INTVAL (index) < 1024
4358 && INTVAL (index) > -1024
4359 && (INTVAL (index) & 3) == 0);
4361 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4363 /* For DImode assume values will usually live in core regs
4364 and only allow LDRD addressing modes. */
4365 if (!TARGET_LDRD || mode != DImode)
4366 return (code == CONST_INT
4367 && INTVAL (index) < 1024
4368 && INTVAL (index) > -1024
4369 && (INTVAL (index) & 3) == 0);
4373 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4374 return (code == CONST_INT
4375 && INTVAL (index) < 1016
4376 && INTVAL (index) > -1024
4377 && (INTVAL (index) & 3) == 0);
4379 if (arm_address_register_rtx_p (index, strict_p)
4380 && (GET_MODE_SIZE (mode) <= 4))
4383 if (mode == DImode || mode == DFmode)
4385 if (code == CONST_INT)
4387 HOST_WIDE_INT val = INTVAL (index);
4388 /* ??? Can we assume ldrd for thumb2? */
4389 /* Thumb-2 ldrd only has reg+const addressing modes. */
4390 /* ldrd supports offsets of +-1020.
4391 However the ldr fallback does not. */
4392 return val > -256 && val < 256 && (val & 3) == 0;
4400 rtx xiop0 = XEXP (index, 0);
4401 rtx xiop1 = XEXP (index, 1);
4403 return ((arm_address_register_rtx_p (xiop0, strict_p)
4404 && thumb2_index_mul_operand (xiop1))
4405 || (arm_address_register_rtx_p (xiop1, strict_p)
4406 && thumb2_index_mul_operand (xiop0)));
4408 else if (code == ASHIFT)
4410 rtx op = XEXP (index, 1);
4412 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4413 && GET_CODE (op) == CONST_INT
4415 && INTVAL (op) <= 3);
4418 return (code == CONST_INT
4419 && INTVAL (index) < 4096
4420 && INTVAL (index) > -256);
4423 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
4425 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
4429 if (GET_CODE (x) != REG)
4435 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
4437 return (regno <= LAST_LO_REGNUM
4438 || regno > LAST_VIRTUAL_REGISTER
4439 || regno == FRAME_POINTER_REGNUM
4440 || (GET_MODE_SIZE (mode) >= 4
4441 && (regno == STACK_POINTER_REGNUM
4442 || regno >= FIRST_PSEUDO_REGISTER
4443 || x == hard_frame_pointer_rtx
4444 || x == arg_pointer_rtx)));
4447 /* Return nonzero if x is a legitimate index register. This is the case
4448 for any base register that can access a QImode object. */
4450 thumb1_index_register_rtx_p (rtx x, int strict_p)
4452 return thumb1_base_register_rtx_p (x, QImode, strict_p);
4455 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
4457 The AP may be eliminated to either the SP or the FP, so we use the
4458 least common denominator, e.g. SImode, and offsets from 0 to 64.
4460 ??? Verify whether the above is the right approach.
4462 ??? Also, the FP may be eliminated to the SP, so perhaps that
4463 needs special handling also.
4465 ??? Look at how the mips16 port solves this problem. It probably uses
4466 better ways to solve some of these problems.
4468 Although it is not incorrect, we don't accept QImode and HImode
4469 addresses based on the frame pointer or arg pointer until the
4470 reload pass starts. This is so that eliminating such addresses
4471 into stack based ones won't produce impossible code. */
4473 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4475 /* ??? Not clear if this is right. Experiment. */
4476 if (GET_MODE_SIZE (mode) < 4
4477 && !(reload_in_progress || reload_completed)
4478 && (reg_mentioned_p (frame_pointer_rtx, x)
4479 || reg_mentioned_p (arg_pointer_rtx, x)
4480 || reg_mentioned_p (virtual_incoming_args_rtx, x)
4481 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
4482 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
4483 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
4486 /* Accept any base register. SP only in SImode or larger. */
4487 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
4490 /* This is PC relative data before arm_reorg runs. */
4491 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
4492 && GET_CODE (x) == SYMBOL_REF
4493 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
4496 /* This is PC relative data after arm_reorg runs. */
4497 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
4499 && (GET_CODE (x) == LABEL_REF
4500 || (GET_CODE (x) == CONST
4501 && GET_CODE (XEXP (x, 0)) == PLUS
4502 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4503 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4506 /* Post-inc indexing only supported for SImode and larger. */
4507 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
4508 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
4511 else if (GET_CODE (x) == PLUS)
4513 /* REG+REG address can be any two index registers. */
4514 /* We disallow FRAME+REG addressing since we know that FRAME
4515 will be replaced with STACK, and SP relative addressing only
4516 permits SP+OFFSET. */
4517 if (GET_MODE_SIZE (mode) <= 4
4518 && XEXP (x, 0) != frame_pointer_rtx
4519 && XEXP (x, 1) != frame_pointer_rtx
4520 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4521 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
4524 /* REG+const has 5-7 bit offset for non-SP registers. */
4525 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4526 || XEXP (x, 0) == arg_pointer_rtx)
4527 && GET_CODE (XEXP (x, 1)) == CONST_INT
4528 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4531 /* REG+const has 10-bit offset for SP, but only SImode and
4532 larger is supported. */
4533 /* ??? Should probably check for DI/DFmode overflow here
4534 just like GO_IF_LEGITIMATE_OFFSET does. */
4535 else if (GET_CODE (XEXP (x, 0)) == REG
4536 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
4537 && GET_MODE_SIZE (mode) >= 4
4538 && GET_CODE (XEXP (x, 1)) == CONST_INT
4539 && INTVAL (XEXP (x, 1)) >= 0
4540 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
4541 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4544 else if (GET_CODE (XEXP (x, 0)) == REG
4545 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
4546 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
4547 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
4548 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
4549 && GET_MODE_SIZE (mode) >= 4
4550 && GET_CODE (XEXP (x, 1)) == CONST_INT
4551 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4555 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4556 && GET_MODE_SIZE (mode) == 4
4557 && GET_CODE (x) == SYMBOL_REF
4558 && CONSTANT_POOL_ADDRESS_P (x)
4560 && symbol_mentioned_p (get_pool_constant (x))
4561 && ! pcrel_constant_p (get_pool_constant (x))))
4567 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
4568 instruction of mode MODE. */
4570 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
4572 switch (GET_MODE_SIZE (mode))
4575 return val >= 0 && val < 32;
4578 return val >= 0 && val < 64 && (val & 1) == 0;
4582 && (val + GET_MODE_SIZE (mode)) <= 128
4588 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
4591 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
4592 else if (TARGET_THUMB2)
4593 return thumb2_legitimate_address_p (mode, x, strict_p);
4594 else /* if (TARGET_THUMB1) */
4595 return thumb1_legitimate_address_p (mode, x, strict_p);
4598 /* Build the SYMBOL_REF for __tls_get_addr. */
4600 static GTY(()) rtx tls_get_addr_libfunc;
4603 get_tls_get_addr (void)
4605 if (!tls_get_addr_libfunc)
4606 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
4607 return tls_get_addr_libfunc;
4611 arm_load_tp (rtx target)
4614 target = gen_reg_rtx (SImode);
4618 /* Can return in any reg. */
4619 emit_insn (gen_load_tp_hard (target));
4623 /* Always returned in r0. Immediately copy the result into a pseudo,
4624 otherwise other uses of r0 (e.g. setting up function arguments) may
4625 clobber the value. */
4629 emit_insn (gen_load_tp_soft ());
4631 tmp = gen_rtx_REG (SImode, 0);
4632 emit_move_insn (target, tmp);
4638 load_tls_operand (rtx x, rtx reg)
4642 if (reg == NULL_RTX)
4643 reg = gen_reg_rtx (SImode);
4645 tmp = gen_rtx_CONST (SImode, x);
4647 emit_move_insn (reg, tmp);
4653 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
4655 rtx insns, label, labelno, sum;
4659 labelno = GEN_INT (pic_labelno++);
4660 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4661 label = gen_rtx_CONST (VOIDmode, label);
4663 sum = gen_rtx_UNSPEC (Pmode,
4664 gen_rtvec (4, x, GEN_INT (reloc), label,
4665 GEN_INT (TARGET_ARM ? 8 : 4)),
4667 reg = load_tls_operand (sum, reg);
4670 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
4671 else if (TARGET_THUMB2)
4674 /* Thumb-2 only allows very limited access to the PC. Calculate
4675 the address in a temporary register. */
4676 tmp = gen_reg_rtx (SImode);
4677 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4678 emit_insn (gen_addsi3(reg, reg, tmp));
4680 else /* TARGET_THUMB1 */
4681 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4683 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
4684 Pmode, 1, reg, Pmode);
4686 insns = get_insns ();
4693 legitimize_tls_address (rtx x, rtx reg)
4695 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
4696 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
4700 case TLS_MODEL_GLOBAL_DYNAMIC:
4701 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
4702 dest = gen_reg_rtx (Pmode);
4703 emit_libcall_block (insns, dest, ret, x);
4706 case TLS_MODEL_LOCAL_DYNAMIC:
4707 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
4709 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
4710 share the LDM result with other LD model accesses. */
4711 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
4713 dest = gen_reg_rtx (Pmode);
4714 emit_libcall_block (insns, dest, ret, eqv);
4716 /* Load the addend. */
4717 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
4719 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
4720 return gen_rtx_PLUS (Pmode, dest, addend);
4722 case TLS_MODEL_INITIAL_EXEC:
4723 labelno = GEN_INT (pic_labelno++);
4724 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4725 label = gen_rtx_CONST (VOIDmode, label);
4726 sum = gen_rtx_UNSPEC (Pmode,
4727 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
4728 GEN_INT (TARGET_ARM ? 8 : 4)),
4730 reg = load_tls_operand (sum, reg);
4733 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
4734 else if (TARGET_THUMB2)
4737 /* Thumb-2 only allows very limited access to the PC. Calculate
4738 the address in a temporary register. */
4739 tmp = gen_reg_rtx (SImode);
4740 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4741 emit_insn (gen_addsi3(reg, reg, tmp));
4742 emit_move_insn (reg, gen_const_mem (SImode, reg));
4746 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4747 emit_move_insn (reg, gen_const_mem (SImode, reg));
4750 tp = arm_load_tp (NULL_RTX);
4752 return gen_rtx_PLUS (Pmode, tp, reg);
4754 case TLS_MODEL_LOCAL_EXEC:
4755 tp = arm_load_tp (NULL_RTX);
4757 reg = gen_rtx_UNSPEC (Pmode,
4758 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
4760 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
4762 return gen_rtx_PLUS (Pmode, tp, reg);
4769 /* Try machine-dependent ways of modifying an illegitimate address
4770 to be legitimate. If we find one, return the new, valid address. */
4772 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4776 /* TODO: legitimize_address for Thumb2. */
4779 return thumb_legitimize_address (x, orig_x, mode);
4782 if (arm_tls_symbol_p (x))
4783 return legitimize_tls_address (x, NULL_RTX);
4785 if (GET_CODE (x) == PLUS)
4787 rtx xop0 = XEXP (x, 0);
4788 rtx xop1 = XEXP (x, 1);
4790 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4791 xop0 = force_reg (SImode, xop0);
4793 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4794 xop1 = force_reg (SImode, xop1);
4796 if (ARM_BASE_REGISTER_RTX_P (xop0)
4797 && GET_CODE (xop1) == CONST_INT)
4799 HOST_WIDE_INT n, low_n;
4803 /* VFP addressing modes actually allow greater offsets, but for
4804 now we just stick with the lowest common denominator. */
4806 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4818 low_n = ((mode) == TImode ? 0
4819 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4823 base_reg = gen_reg_rtx (SImode);
4824 val = force_operand (plus_constant (xop0, n), NULL_RTX);
4825 emit_move_insn (base_reg, val);
4826 x = plus_constant (base_reg, low_n);
4828 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4829 x = gen_rtx_PLUS (SImode, xop0, xop1);
4832 /* XXX We don't allow MINUS any more -- see comment in
4833 arm_legitimate_address_outer_p (). */
4834 else if (GET_CODE (x) == MINUS)
4836 rtx xop0 = XEXP (x, 0);
4837 rtx xop1 = XEXP (x, 1);
4839 if (CONSTANT_P (xop0))
4840 xop0 = force_reg (SImode, xop0);
4842 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4843 xop1 = force_reg (SImode, xop1);
4845 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4846 x = gen_rtx_MINUS (SImode, xop0, xop1);
4849 /* Make sure to take full advantage of the pre-indexed addressing mode
4850 with absolute addresses which often allows for the base register to
4851 be factorized for multiple adjacent memory references, and it might
4852 even allows for the mini pool to be avoided entirely. */
4853 else if (GET_CODE (x) == CONST_INT && optimize > 0)
4856 HOST_WIDE_INT mask, base, index;
4859 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
4860 use a 8-bit index. So let's use a 12-bit index for SImode only and
4861 hope that arm_gen_constant will enable ldrb to use more bits. */
4862 bits = (mode == SImode) ? 12 : 8;
4863 mask = (1 << bits) - 1;
4864 base = INTVAL (x) & ~mask;
4865 index = INTVAL (x) & mask;
4866 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4868 /* It'll most probably be more efficient to generate the base
4869 with more bits set and use a negative index instead. */
4873 base_reg = force_reg (SImode, GEN_INT (base));
4874 x = plus_constant (base_reg, index);
4879 /* We need to find and carefully transform any SYMBOL and LABEL
4880 references; so go back to the original address expression. */
4881 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4883 if (new_x != orig_x)
4891 /* Try machine-dependent ways of modifying an illegitimate Thumb address
4892 to be legitimate. If we find one, return the new, valid address. */
4894 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4896 if (arm_tls_symbol_p (x))
4897 return legitimize_tls_address (x, NULL_RTX);
4899 if (GET_CODE (x) == PLUS
4900 && GET_CODE (XEXP (x, 1)) == CONST_INT
4901 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4902 || INTVAL (XEXP (x, 1)) < 0))
4904 rtx xop0 = XEXP (x, 0);
4905 rtx xop1 = XEXP (x, 1);
4906 HOST_WIDE_INT offset = INTVAL (xop1);
4908 /* Try and fold the offset into a biasing of the base register and
4909 then offsetting that. Don't do this when optimizing for space
4910 since it can cause too many CSEs. */
4911 if (optimize_size && offset >= 0
4912 && offset < 256 + 31 * GET_MODE_SIZE (mode))
4914 HOST_WIDE_INT delta;
4917 delta = offset - (256 - GET_MODE_SIZE (mode));
4918 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4919 delta = 31 * GET_MODE_SIZE (mode);
4921 delta = offset & (~31 * GET_MODE_SIZE (mode));
4923 xop0 = force_operand (plus_constant (xop0, offset - delta),
4925 x = plus_constant (xop0, delta);
4927 else if (offset < 0 && offset > -256)
4928 /* Small negative offsets are best done with a subtract before the
4929 dereference, forcing these into a register normally takes two
4931 x = force_operand (x, NULL_RTX);
4934 /* For the remaining cases, force the constant into a register. */
4935 xop1 = force_reg (SImode, xop1);
4936 x = gen_rtx_PLUS (SImode, xop0, xop1);
4939 else if (GET_CODE (x) == PLUS
4940 && s_register_operand (XEXP (x, 1), SImode)
4941 && !s_register_operand (XEXP (x, 0), SImode))
4943 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4945 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4950 /* We need to find and carefully transform any SYMBOL and LABEL
4951 references; so go back to the original address expression. */
4952 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4954 if (new_x != orig_x)
4962 thumb_legitimize_reload_address (rtx *x_p,
4963 enum machine_mode mode,
4964 int opnum, int type,
4965 int ind_levels ATTRIBUTE_UNUSED)
4969 if (GET_CODE (x) == PLUS
4970 && GET_MODE_SIZE (mode) < 4
4971 && REG_P (XEXP (x, 0))
4972 && XEXP (x, 0) == stack_pointer_rtx
4973 && GET_CODE (XEXP (x, 1)) == CONST_INT
4974 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4979 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4980 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
4984 /* If both registers are hi-regs, then it's better to reload the
4985 entire expression rather than each register individually. That
4986 only requires one reload register rather than two. */
4987 if (GET_CODE (x) == PLUS
4988 && REG_P (XEXP (x, 0))
4989 && REG_P (XEXP (x, 1))
4990 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4991 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4996 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4997 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
5004 /* Test for various thread-local symbols. */
5006 /* Return TRUE if X is a thread-local symbol. */
5009 arm_tls_symbol_p (rtx x)
5011 if (! TARGET_HAVE_TLS)
5014 if (GET_CODE (x) != SYMBOL_REF)
5017 return SYMBOL_REF_TLS_MODEL (x) != 0;
5020 /* Helper for arm_tls_referenced_p. */
5023 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
5025 if (GET_CODE (*x) == SYMBOL_REF)
5026 return SYMBOL_REF_TLS_MODEL (*x) != 0;
5028 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
5029 TLS offsets, not real symbol references. */
5030 if (GET_CODE (*x) == UNSPEC
5031 && XINT (*x, 1) == UNSPEC_TLS)
5037 /* Return TRUE if X contains any TLS symbol references. */
5040 arm_tls_referenced_p (rtx x)
5042 if (! TARGET_HAVE_TLS)
5045 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
5048 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
5051 arm_cannot_force_const_mem (rtx x)
5055 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
5057 split_const (x, &base, &offset);
5058 if (GET_CODE (base) == SYMBOL_REF
5059 && !offset_within_block_p (base, INTVAL (offset)))
5062 return arm_tls_referenced_p (x);
5065 #define REG_OR_SUBREG_REG(X) \
5066 (GET_CODE (X) == REG \
5067 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
5069 #define REG_OR_SUBREG_RTX(X) \
5070 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
5072 #ifndef COSTS_N_INSNS
5073 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
5076 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
5078 enum machine_mode mode = GET_MODE (x);
5091 return COSTS_N_INSNS (1);
5094 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5097 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
5104 return COSTS_N_INSNS (2) + cycles;
5106 return COSTS_N_INSNS (1) + 16;
5109 return (COSTS_N_INSNS (1)
5110 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
5111 + GET_CODE (SET_DEST (x)) == MEM));
5116 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
5118 if (thumb_shiftable_const (INTVAL (x)))
5119 return COSTS_N_INSNS (2);
5120 return COSTS_N_INSNS (3);
5122 else if ((outer == PLUS || outer == COMPARE)
5123 && INTVAL (x) < 256 && INTVAL (x) > -256)
5125 else if (outer == AND
5126 && INTVAL (x) < 256 && INTVAL (x) >= -256)
5127 return COSTS_N_INSNS (1);
5128 else if (outer == ASHIFT || outer == ASHIFTRT
5129 || outer == LSHIFTRT)
5131 return COSTS_N_INSNS (2);
5137 return COSTS_N_INSNS (3);
5155 /* XXX another guess. */
5156 /* Memory costs quite a lot for the first word, but subsequent words
5157 load at the equivalent of a single insn each. */
5158 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
5159 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
5164 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5169 /* XXX still guessing. */
5170 switch (GET_MODE (XEXP (x, 0)))
5173 return (1 + (mode == DImode ? 4 : 0)
5174 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5177 return (4 + (mode == DImode ? 4 : 0)
5178 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5181 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5193 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
5195 enum machine_mode mode = GET_MODE (x);
5196 enum rtx_code subcode;
5198 enum rtx_code code = GET_CODE (x);
5205 /* Memory costs quite a lot for the first word, but subsequent words
5206 load at the equivalent of a single insn each. */
5207 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
5214 if (TARGET_HARD_FLOAT && mode == SFmode)
5215 *total = COSTS_N_INSNS (2);
5216 else if (TARGET_HARD_FLOAT && mode == DFmode)
5217 *total = COSTS_N_INSNS (4);
5219 *total = COSTS_N_INSNS (20);
5223 if (GET_CODE (XEXP (x, 1)) == REG)
5224 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
5225 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5226 *total = rtx_cost (XEXP (x, 1), code, speed);
5232 *total += COSTS_N_INSNS (4);
5237 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
5238 *total += rtx_cost (XEXP (x, 0), code, speed);
5241 *total += COSTS_N_INSNS (3);
5245 *total += COSTS_N_INSNS (1);
5246 /* Increase the cost of complex shifts because they aren't any faster,
5247 and reduce dual issue opportunities. */
5248 if (arm_tune_cortex_a9
5249 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
5257 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5259 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5260 *total = COSTS_N_INSNS (1);
5262 *total = COSTS_N_INSNS (20);
5265 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5266 /* Thumb2 does not have RSB, so all arguments must be
5267 registers (subtracting a constant is canonicalized as
5268 addition of the negated constant). */
5274 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5275 if (GET_CODE (XEXP (x, 0)) == CONST_INT
5276 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
5278 *total += rtx_cost (XEXP (x, 1), code, speed);
5282 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5283 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
5285 *total += rtx_cost (XEXP (x, 0), code, speed);
5292 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5294 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5296 *total = COSTS_N_INSNS (1);
5297 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
5298 && arm_const_double_rtx (XEXP (x, 0)))
5300 *total += rtx_cost (XEXP (x, 1), code, speed);
5304 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
5305 && arm_const_double_rtx (XEXP (x, 1)))
5307 *total += rtx_cost (XEXP (x, 0), code, speed);
5313 *total = COSTS_N_INSNS (20);
5317 *total = COSTS_N_INSNS (1);
5318 if (GET_CODE (XEXP (x, 0)) == CONST_INT
5319 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
5321 *total += rtx_cost (XEXP (x, 1), code, speed);
5325 subcode = GET_CODE (XEXP (x, 1));
5326 if (subcode == ASHIFT || subcode == ASHIFTRT
5327 || subcode == LSHIFTRT
5328 || subcode == ROTATE || subcode == ROTATERT)
5330 *total += rtx_cost (XEXP (x, 0), code, speed);
5331 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
5335 /* A shift as a part of RSB costs no more than RSB itself. */
5336 if (GET_CODE (XEXP (x, 0)) == MULT
5337 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5339 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
5340 *total += rtx_cost (XEXP (x, 1), code, speed);
5345 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
5347 *total += rtx_cost (XEXP (x, 0), code, speed);
5348 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
5352 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
5353 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
5355 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
5356 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
5357 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
5358 *total += COSTS_N_INSNS (1);
5366 if (code == PLUS && arm_arch6 && mode == SImode
5367 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5368 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5370 *total = COSTS_N_INSNS (1);
5371 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
5373 *total += rtx_cost (XEXP (x, 1), code, speed);
5377 /* MLA: All arguments must be registers. We filter out
5378 multiplication by a power of two, so that we fall down into
5380 if (GET_CODE (XEXP (x, 0)) == MULT
5381 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5383 /* The cost comes from the cost of the multiply. */
5387 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5389 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5391 *total = COSTS_N_INSNS (1);
5392 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
5393 && arm_const_double_rtx (XEXP (x, 1)))
5395 *total += rtx_cost (XEXP (x, 0), code, speed);
5402 *total = COSTS_N_INSNS (20);
5406 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
5407 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
5409 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
5410 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5411 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
5412 *total += COSTS_N_INSNS (1);
5418 case AND: case XOR: case IOR:
5421 /* Normally the frame registers will be spilt into reg+const during
5422 reload, so it is a bad idea to combine them with other instructions,
5423 since then they might not be moved outside of loops. As a compromise
5424 we allow integration with ops that have a constant as their second
5426 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
5427 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
5428 && GET_CODE (XEXP (x, 1)) != CONST_INT)
5429 || (REG_OR_SUBREG_REG (XEXP (x, 0))
5430 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
5435 *total += COSTS_N_INSNS (2);
5436 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5437 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
5439 *total += rtx_cost (XEXP (x, 0), code, speed);
5446 *total += COSTS_N_INSNS (1);
5447 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5448 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
5450 *total += rtx_cost (XEXP (x, 0), code, speed);
5453 subcode = GET_CODE (XEXP (x, 0));
5454 if (subcode == ASHIFT || subcode == ASHIFTRT
5455 || subcode == LSHIFTRT
5456 || subcode == ROTATE || subcode == ROTATERT)
5458 *total += rtx_cost (XEXP (x, 1), code, speed);
5459 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5464 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5466 *total += rtx_cost (XEXP (x, 1), code, speed);
5467 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5471 if (subcode == UMIN || subcode == UMAX
5472 || subcode == SMIN || subcode == SMAX)
5474 *total = COSTS_N_INSNS (3);
5481 /* This should have been handled by the CPU specific routines. */
5485 if (arm_arch3m && mode == SImode
5486 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5487 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5488 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
5489 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
5490 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5491 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
5493 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
5496 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
5500 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5502 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5504 *total = COSTS_N_INSNS (1);
5507 *total = COSTS_N_INSNS (2);
5513 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
5514 if (mode == SImode && code == NOT)
5516 subcode = GET_CODE (XEXP (x, 0));
5517 if (subcode == ASHIFT || subcode == ASHIFTRT
5518 || subcode == LSHIFTRT
5519 || subcode == ROTATE || subcode == ROTATERT
5521 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
5523 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5524 /* Register shifts cost an extra cycle. */
5525 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
5526 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
5535 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5537 *total = COSTS_N_INSNS (4);
5541 operand = XEXP (x, 0);
5543 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
5544 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
5545 && GET_CODE (XEXP (operand, 0)) == REG
5546 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
5547 *total += COSTS_N_INSNS (1);
5548 *total += (rtx_cost (XEXP (x, 1), code, speed)
5549 + rtx_cost (XEXP (x, 2), code, speed));
5553 if (mode == SImode && XEXP (x, 1) == const0_rtx)
5555 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
5561 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
5562 && mode == SImode && XEXP (x, 1) == const0_rtx)
5564 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
5570 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
5571 && mode == SImode && XEXP (x, 1) == const0_rtx)
5573 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
5593 /* SCC insns. In the case where the comparison has already been
5594 performed, then they cost 2 instructions. Otherwise they need
5595 an additional comparison before them. */
5596 *total = COSTS_N_INSNS (2);
5597 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
5604 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
5610 *total += COSTS_N_INSNS (1);
5611 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5612 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
5614 *total += rtx_cost (XEXP (x, 0), code, speed);
5618 subcode = GET_CODE (XEXP (x, 0));
5619 if (subcode == ASHIFT || subcode == ASHIFTRT
5620 || subcode == LSHIFTRT
5621 || subcode == ROTATE || subcode == ROTATERT)
5623 *total += rtx_cost (XEXP (x, 1), code, speed);
5624 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5629 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5631 *total += rtx_cost (XEXP (x, 1), code, speed);
5632 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5642 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
5643 if (GET_CODE (XEXP (x, 1)) != CONST_INT
5644 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
5645 *total += rtx_cost (XEXP (x, 1), code, speed);
5649 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5651 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5653 *total = COSTS_N_INSNS (1);
5656 *total = COSTS_N_INSNS (20);
5659 *total = COSTS_N_INSNS (1);
5661 *total += COSTS_N_INSNS (3);
5665 if (GET_MODE_CLASS (mode) == MODE_INT)
5669 *total += COSTS_N_INSNS (1);
5671 if (GET_MODE (XEXP (x, 0)) != SImode)
5675 if (GET_CODE (XEXP (x, 0)) != MEM)
5676 *total += COSTS_N_INSNS (1);
5678 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
5679 *total += COSTS_N_INSNS (2);
5688 if (GET_MODE_CLASS (mode) == MODE_INT)
5691 *total += COSTS_N_INSNS (1);
5693 if (GET_MODE (XEXP (x, 0)) != SImode)
5697 if (GET_CODE (XEXP (x, 0)) != MEM)
5698 *total += COSTS_N_INSNS (1);
5700 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
5701 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
5708 switch (GET_MODE (XEXP (x, 0)))
5715 *total = COSTS_N_INSNS (1);
5725 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
5729 if (const_ok_for_arm (INTVAL (x))
5730 || const_ok_for_arm (~INTVAL (x)))
5731 *total = COSTS_N_INSNS (1);
5733 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
5734 INTVAL (x), NULL_RTX,
5741 *total = COSTS_N_INSNS (3);
5745 *total = COSTS_N_INSNS (1);
5749 *total = COSTS_N_INSNS (1);
5750 *total += rtx_cost (XEXP (x, 0), code, speed);
5754 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x))
5755 *total = COSTS_N_INSNS (1);
5757 *total = COSTS_N_INSNS (4);
5761 *total = COSTS_N_INSNS (4);
5766 /* RTX costs when optimizing for size. */
5768 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
5771 enum machine_mode mode = GET_MODE (x);
5774 /* XXX TBD. For now, use the standard costs. */
5775 *total = thumb1_rtx_costs (x, code, outer_code);
5779 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
5783 /* A memory access costs 1 insn if the mode is small, or the address is
5784 a single register, otherwise it costs one insn per word. */
5785 if (REG_P (XEXP (x, 0)))
5786 *total = COSTS_N_INSNS (1);
5788 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5795 /* Needs a libcall, so it costs about this. */
5796 *total = COSTS_N_INSNS (2);
5800 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
5802 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
5810 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
5812 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
5815 else if (mode == SImode)
5817 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
5818 /* Slightly disparage register shifts, but not by much. */
5819 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5820 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
5824 /* Needs a libcall. */
5825 *total = COSTS_N_INSNS (2);
5829 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5831 *total = COSTS_N_INSNS (1);
5837 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
5838 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
5840 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
5841 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
5842 || subcode1 == ROTATE || subcode1 == ROTATERT
5843 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
5844 || subcode1 == ASHIFTRT)
5846 /* It's just the cost of the two operands. */
5851 *total = COSTS_N_INSNS (1);
5855 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5859 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5861 *total = COSTS_N_INSNS (1);
5865 /* A shift as a part of ADD costs nothing. */
5866 if (GET_CODE (XEXP (x, 0)) == MULT
5867 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5869 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
5870 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
5871 *total += rtx_cost (XEXP (x, 1), code, false);
5876 case AND: case XOR: case IOR:
5879 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
5881 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
5882 || subcode == LSHIFTRT || subcode == ASHIFTRT
5883 || (code == AND && subcode == NOT))
5885 /* It's just the cost of the two operands. */
5891 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5895 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5899 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5901 *total = COSTS_N_INSNS (1);
5907 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5916 if (cc_register (XEXP (x, 0), VOIDmode))
5919 *total = COSTS_N_INSNS (1);
5923 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5924 *total = COSTS_N_INSNS (1);
5926 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
5931 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
5933 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5934 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5937 *total += COSTS_N_INSNS (1);
5942 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5944 switch (GET_MODE (XEXP (x, 0)))
5947 *total += COSTS_N_INSNS (1);
5951 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5957 *total += COSTS_N_INSNS (2);
5962 *total += COSTS_N_INSNS (1);
5967 if (const_ok_for_arm (INTVAL (x)))
5968 /* A multiplication by a constant requires another instruction
5969 to load the constant to a register. */
5970 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
5972 else if (const_ok_for_arm (~INTVAL (x)))
5973 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
5974 else if (const_ok_for_arm (-INTVAL (x)))
5976 if (outer_code == COMPARE || outer_code == PLUS
5977 || outer_code == MINUS)
5980 *total = COSTS_N_INSNS (1);
5983 *total = COSTS_N_INSNS (2);
5989 *total = COSTS_N_INSNS (2);
5993 *total = COSTS_N_INSNS (4);
5998 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
5999 cost of these slightly. */
6000 *total = COSTS_N_INSNS (1) + 1;
6004 if (mode != VOIDmode)
6005 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6007 *total = COSTS_N_INSNS (4); /* How knows? */
6012 /* RTX costs when optimizing for size. */
6014 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
6018 return arm_size_rtx_costs (x, (enum rtx_code) code,
6019 (enum rtx_code) outer_code, total);
6021 return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code,
6022 (enum rtx_code) outer_code,
6026 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
6027 supported on any "slowmul" cores, so it can be ignored. */
6030 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6031 int *total, bool speed)
6033 enum machine_mode mode = GET_MODE (x);
6037 *total = thumb1_rtx_costs (x, code, outer_code);
6044 if (GET_MODE_CLASS (mode) == MODE_FLOAT
6047 *total = COSTS_N_INSNS (20);
6051 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6053 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
6054 & (unsigned HOST_WIDE_INT) 0xffffffff);
6055 int cost, const_ok = const_ok_for_arm (i);
6056 int j, booth_unit_size;
6058 /* Tune as appropriate. */
6059 cost = const_ok ? 4 : 8;
6060 booth_unit_size = 2;
6061 for (j = 0; i && j < 32; j += booth_unit_size)
6063 i >>= booth_unit_size;
6067 *total = COSTS_N_INSNS (cost);
6068 *total += rtx_cost (XEXP (x, 0), code, speed);
6072 *total = COSTS_N_INSNS (20);
6076 return arm_rtx_costs_1 (x, outer_code, total, speed);;
6081 /* RTX cost for cores with a fast multiply unit (M variants). */
6084 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6085 int *total, bool speed)
6087 enum machine_mode mode = GET_MODE (x);
6091 *total = thumb1_rtx_costs (x, code, outer_code);
6095 /* ??? should thumb2 use different costs? */
6099 /* There is no point basing this on the tuning, since it is always the
6100 fast variant if it exists at all. */
6102 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
6103 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6104 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6106 *total = COSTS_N_INSNS(2);
6113 *total = COSTS_N_INSNS (5);
6117 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6119 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
6120 & (unsigned HOST_WIDE_INT) 0xffffffff);
6121 int cost, const_ok = const_ok_for_arm (i);
6122 int j, booth_unit_size;
6124 /* Tune as appropriate. */
6125 cost = const_ok ? 4 : 8;
6126 booth_unit_size = 8;
6127 for (j = 0; i && j < 32; j += booth_unit_size)
6129 i >>= booth_unit_size;
6133 *total = COSTS_N_INSNS(cost);
6139 *total = COSTS_N_INSNS (4);
6143 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6145 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6147 *total = COSTS_N_INSNS (1);
6152 /* Requires a lib call */
6153 *total = COSTS_N_INSNS (20);
6157 return arm_rtx_costs_1 (x, outer_code, total, speed);
6162 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
6163 so it can be ignored. */
6166 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
6168 enum machine_mode mode = GET_MODE (x);
6172 *total = thumb1_rtx_costs (x, code, outer_code);
6179 if (GET_CODE (XEXP (x, 0)) != MULT)
6180 return arm_rtx_costs_1 (x, outer_code, total, speed);
6182 /* A COMPARE of a MULT is slow on XScale; the muls instruction
6183 will stall until the multiplication is complete. */
6184 *total = COSTS_N_INSNS (3);
6188 /* There is no point basing this on the tuning, since it is always the
6189 fast variant if it exists at all. */
6191 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
6192 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6193 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6195 *total = COSTS_N_INSNS (2);
6202 *total = COSTS_N_INSNS (5);
6206 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6208 /* If operand 1 is a constant we can more accurately
6209 calculate the cost of the multiply. The multiplier can
6210 retire 15 bits on the first cycle and a further 12 on the
6211 second. We do, of course, have to load the constant into
6212 a register first. */
6213 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6214 /* There's a general overhead of one cycle. */
6216 unsigned HOST_WIDE_INT masked_const;
6221 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
6223 masked_const = i & 0xffff8000;
6224 if (masked_const != 0)
6227 masked_const = i & 0xf8000000;
6228 if (masked_const != 0)
6231 *total = COSTS_N_INSNS (cost);
6237 *total = COSTS_N_INSNS (3);
6241 /* Requires a lib call */
6242 *total = COSTS_N_INSNS (20);
6246 return arm_rtx_costs_1 (x, outer_code, total, speed);
6251 /* RTX costs for 9e (and later) cores. */
6254 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6255 int *total, bool speed)
6257 enum machine_mode mode = GET_MODE (x);
6264 *total = COSTS_N_INSNS (3);
6268 *total = thumb1_rtx_costs (x, code, outer_code);
6276 /* There is no point basing this on the tuning, since it is always the
6277 fast variant if it exists at all. */
6279 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
6280 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6281 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6283 *total = COSTS_N_INSNS (2);
6290 *total = COSTS_N_INSNS (5);
6296 *total = COSTS_N_INSNS (2);
6300 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6302 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6304 *total = COSTS_N_INSNS (1);
6309 *total = COSTS_N_INSNS (20);
6313 return arm_rtx_costs_1 (x, outer_code, total, speed);
6316 /* All address computations that can be done are free, but rtx cost returns
6317 the same for practically all of them. So we weight the different types
6318 of address here in the order (most pref first):
6319 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
6321 arm_arm_address_cost (rtx x)
6323 enum rtx_code c = GET_CODE (x);
6325 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
6327 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
6330 if (c == PLUS || c == MINUS)
6332 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6335 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
6345 arm_thumb_address_cost (rtx x)
6347 enum rtx_code c = GET_CODE (x);
6352 && GET_CODE (XEXP (x, 0)) == REG
6353 && GET_CODE (XEXP (x, 1)) == CONST_INT)
6360 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
6362 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
6366 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
6370 /* Some true dependencies can have a higher cost depending
6371 on precisely how certain input operands are used. */
6373 && REG_NOTE_KIND (link) == 0
6374 && recog_memoized (insn) >= 0
6375 && recog_memoized (dep) >= 0)
6377 int shift_opnum = get_attr_shift (insn);
6378 enum attr_type attr_type = get_attr_type (dep);
6380 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
6381 operand for INSN. If we have a shifted input operand and the
6382 instruction we depend on is another ALU instruction, then we may
6383 have to account for an additional stall. */
6384 if (shift_opnum != 0
6385 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
6387 rtx shifted_operand;
6390 /* Get the shifted operand. */
6391 extract_insn (insn);
6392 shifted_operand = recog_data.operand[shift_opnum];
6394 /* Iterate over all the operands in DEP. If we write an operand
6395 that overlaps with SHIFTED_OPERAND, then we have increase the
6396 cost of this dependency. */
6398 preprocess_constraints ();
6399 for (opno = 0; opno < recog_data.n_operands; opno++)
6401 /* We can ignore strict inputs. */
6402 if (recog_data.operand_type[opno] == OP_IN)
6405 if (reg_overlap_mentioned_p (recog_data.operand[opno],
6412 /* XXX This is not strictly true for the FPA. */
6413 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
6414 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
6417 /* Call insns don't incur a stall, even if they follow a load. */
6418 if (REG_NOTE_KIND (link) == 0
6419 && GET_CODE (insn) == CALL_INSN)
6422 if ((i_pat = single_set (insn)) != NULL
6423 && GET_CODE (SET_SRC (i_pat)) == MEM
6424 && (d_pat = single_set (dep)) != NULL
6425 && GET_CODE (SET_DEST (d_pat)) == MEM)
6427 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
6428 /* This is a load after a store, there is no conflict if the load reads
6429 from a cached area. Assume that loads from the stack, and from the
6430 constant pool are cached, and that others will miss. This is a
6433 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
6434 || reg_mentioned_p (stack_pointer_rtx, src_mem)
6435 || reg_mentioned_p (frame_pointer_rtx, src_mem)
6436 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
6443 static int fp_consts_inited = 0;
6445 /* Only zero is valid for VFP. Other values are also valid for FPA. */
6446 static const char * const strings_fp[8] =
6449 "4", "5", "0.5", "10"
6452 static REAL_VALUE_TYPE values_fp[8];
6455 init_fp_table (void)
6461 fp_consts_inited = 1;
6463 fp_consts_inited = 8;
6465 for (i = 0; i < fp_consts_inited; i++)
6467 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
6472 /* Return TRUE if rtx X is a valid immediate FP constant. */
6474 arm_const_double_rtx (rtx x)
6479 if (!fp_consts_inited)
6482 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6483 if (REAL_VALUE_MINUS_ZERO (r))
6486 for (i = 0; i < fp_consts_inited; i++)
6487 if (REAL_VALUES_EQUAL (r, values_fp[i]))
6493 /* Return TRUE if rtx X is a valid immediate FPA constant. */
6495 neg_const_double_rtx_ok_for_fpa (rtx x)
6500 if (!fp_consts_inited)
6503 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6504 r = REAL_VALUE_NEGATE (r);
6505 if (REAL_VALUE_MINUS_ZERO (r))
6508 for (i = 0; i < 8; i++)
6509 if (REAL_VALUES_EQUAL (r, values_fp[i]))
6516 /* VFPv3 has a fairly wide range of representable immediates, formed from
6517 "quarter-precision" floating-point values. These can be evaluated using this
6518 formula (with ^ for exponentiation):
6522 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
6523 16 <= n <= 31 and 0 <= r <= 7.
6525 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
6527 - A (most-significant) is the sign bit.
6528 - BCD are the exponent (encoded as r XOR 3).
6529 - EFGH are the mantissa (encoded as n - 16).
6532 /* Return an integer index for a VFPv3 immediate operand X suitable for the
6533 fconst[sd] instruction, or -1 if X isn't suitable. */
6535 vfp3_const_double_index (rtx x)
6537 REAL_VALUE_TYPE r, m;
6539 unsigned HOST_WIDE_INT mantissa, mant_hi;
6540 unsigned HOST_WIDE_INT mask;
6541 HOST_WIDE_INT m1, m2;
6542 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
6544 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
6547 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6549 /* We can't represent these things, so detect them first. */
6550 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
6553 /* Extract sign, exponent and mantissa. */
6554 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
6555 r = REAL_VALUE_ABS (r);
6556 exponent = REAL_EXP (&r);
6557 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
6558 highest (sign) bit, with a fixed binary point at bit point_pos.
6559 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
6560 bits for the mantissa, this may fail (low bits would be lost). */
6561 real_ldexp (&m, &r, point_pos - exponent);
6562 REAL_VALUE_TO_INT (&m1, &m2, m);
6566 /* If there are bits set in the low part of the mantissa, we can't
6567 represent this value. */
6571 /* Now make it so that mantissa contains the most-significant bits, and move
6572 the point_pos to indicate that the least-significant bits have been
6574 point_pos -= HOST_BITS_PER_WIDE_INT;
6577 /* We can permit four significant bits of mantissa only, plus a high bit
6578 which is always 1. */
6579 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
6580 if ((mantissa & mask) != 0)
6583 /* Now we know the mantissa is in range, chop off the unneeded bits. */
6584 mantissa >>= point_pos - 5;
6586 /* The mantissa may be zero. Disallow that case. (It's possible to load the
6587 floating-point immediate zero with Neon using an integer-zero load, but
6588 that case is handled elsewhere.) */
6592 gcc_assert (mantissa >= 16 && mantissa <= 31);
6594 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
6595 normalized significands are in the range [1, 2). (Our mantissa is shifted
6596 left 4 places at this point relative to normalized IEEE754 values). GCC
6597 internally uses [0.5, 1) (see real.c), so the exponent returned from
6598 REAL_EXP must be altered. */
6599 exponent = 5 - exponent;
6601 if (exponent < 0 || exponent > 7)
6604 /* Sign, mantissa and exponent are now in the correct form to plug into the
6605 formula described in the comment above. */
6606 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
6609 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
6611 vfp3_const_double_rtx (rtx x)
6616 return vfp3_const_double_index (x) != -1;
6619 /* Recognize immediates which can be used in various Neon instructions. Legal
6620 immediates are described by the following table (for VMVN variants, the
6621 bitwise inverse of the constant shown is recognized. In either case, VMOV
6622 is output and the correct instruction to use for a given constant is chosen
6623 by the assembler). The constant shown is replicated across all elements of
6624 the destination vector.
6626 insn elems variant constant (binary)
6627 ---- ----- ------- -----------------
6628 vmov i32 0 00000000 00000000 00000000 abcdefgh
6629 vmov i32 1 00000000 00000000 abcdefgh 00000000
6630 vmov i32 2 00000000 abcdefgh 00000000 00000000
6631 vmov i32 3 abcdefgh 00000000 00000000 00000000
6632 vmov i16 4 00000000 abcdefgh
6633 vmov i16 5 abcdefgh 00000000
6634 vmvn i32 6 00000000 00000000 00000000 abcdefgh
6635 vmvn i32 7 00000000 00000000 abcdefgh 00000000
6636 vmvn i32 8 00000000 abcdefgh 00000000 00000000
6637 vmvn i32 9 abcdefgh 00000000 00000000 00000000
6638 vmvn i16 10 00000000 abcdefgh
6639 vmvn i16 11 abcdefgh 00000000
6640 vmov i32 12 00000000 00000000 abcdefgh 11111111
6641 vmvn i32 13 00000000 00000000 abcdefgh 11111111
6642 vmov i32 14 00000000 abcdefgh 11111111 11111111
6643 vmvn i32 15 00000000 abcdefgh 11111111 11111111
6645 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
6646 eeeeeeee ffffffff gggggggg hhhhhhhh
6647 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
6649 For case 18, B = !b. Representable values are exactly those accepted by
6650 vfp3_const_double_index, but are output as floating-point numbers rather
6653 Variants 0-5 (inclusive) may also be used as immediates for the second
6654 operand of VORR/VBIC instructions.
6656 The INVERSE argument causes the bitwise inverse of the given operand to be
6657 recognized instead (used for recognizing legal immediates for the VAND/VORN
6658 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
6659 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
6660 output, rather than the real insns vbic/vorr).
6662 INVERSE makes no difference to the recognition of float vectors.
6664 The return value is the variant of immediate as shown in the above table, or
6665 -1 if the given value doesn't match any of the listed patterns.
6668 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
6669 rtx *modconst, int *elementwidth)
6671 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
6673 for (i = 0; i < idx; i += (STRIDE)) \
6678 immtype = (CLASS); \
6679 elsize = (ELSIZE); \
6683 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6684 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6685 unsigned char bytes[16];
6686 int immtype = -1, matches;
6687 unsigned int invmask = inverse ? 0xff : 0;
6689 /* Vectors of float constants. */
6690 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6692 rtx el0 = CONST_VECTOR_ELT (op, 0);
6695 if (!vfp3_const_double_rtx (el0))
6698 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
6700 for (i = 1; i < n_elts; i++)
6702 rtx elt = CONST_VECTOR_ELT (op, i);
6705 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
6707 if (!REAL_VALUES_EQUAL (r0, re))
6712 *modconst = CONST_VECTOR_ELT (op, 0);
6720 /* Splat vector constant out into a byte vector. */
6721 for (i = 0; i < n_elts; i++)
6723 rtx el = CONST_VECTOR_ELT (op, i);
6724 unsigned HOST_WIDE_INT elpart;
6725 unsigned int part, parts;
6727 if (GET_CODE (el) == CONST_INT)
6729 elpart = INTVAL (el);
6732 else if (GET_CODE (el) == CONST_DOUBLE)
6734 elpart = CONST_DOUBLE_LOW (el);
6740 for (part = 0; part < parts; part++)
6743 for (byte = 0; byte < innersize; byte++)
6745 bytes[idx++] = (elpart & 0xff) ^ invmask;
6746 elpart >>= BITS_PER_UNIT;
6748 if (GET_CODE (el) == CONST_DOUBLE)
6749 elpart = CONST_DOUBLE_HIGH (el);
6754 gcc_assert (idx == GET_MODE_SIZE (mode));
6758 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6759 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6761 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6762 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6764 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6765 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6767 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6768 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
6770 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
6772 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
6774 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6775 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6777 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6778 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6780 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6781 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6783 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6784 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
6786 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
6788 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
6790 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6791 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6793 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6794 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6796 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6797 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6799 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6800 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6802 CHECK (1, 8, 16, bytes[i] == bytes[0]);
6804 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6805 && bytes[i] == bytes[(i + 8) % idx]);
6813 *elementwidth = elsize;
6817 unsigned HOST_WIDE_INT imm = 0;
6819 /* Un-invert bytes of recognized vector, if necessary. */
6821 for (i = 0; i < idx; i++)
6822 bytes[i] ^= invmask;
6826 /* FIXME: Broken on 32-bit H_W_I hosts. */
6827 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6829 for (i = 0; i < 8; i++)
6830 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6831 << (i * BITS_PER_UNIT);
6833 *modconst = GEN_INT (imm);
6837 unsigned HOST_WIDE_INT imm = 0;
6839 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6840 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6842 *modconst = GEN_INT (imm);
6850 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
6851 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
6852 float elements), and a modified constant (whatever should be output for a
6853 VMOV) in *MODCONST. */
6856 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
6857 rtx *modconst, int *elementwidth)
6861 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
6867 *modconst = tmpconst;
6870 *elementwidth = tmpwidth;
6875 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
6876 the immediate is valid, write a constant suitable for using as an operand
6877 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
6878 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
6881 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
6882 rtx *modconst, int *elementwidth)
6886 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
6888 if (retval < 0 || retval > 5)
6892 *modconst = tmpconst;
6895 *elementwidth = tmpwidth;
6900 /* Return a string suitable for output of Neon immediate logic operation
6904 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
6905 int inverse, int quad)
6907 int width, is_valid;
6908 static char templ[40];
6910 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
6912 gcc_assert (is_valid != 0);
6915 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
6917 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
6922 /* Output a sequence of pairwise operations to implement a reduction.
6923 NOTE: We do "too much work" here, because pairwise operations work on two
6924 registers-worth of operands in one go. Unfortunately we can't exploit those
6925 extra calculations to do the full operation in fewer steps, I don't think.
6926 Although all vector elements of the result but the first are ignored, we
6927 actually calculate the same result in each of the elements. An alternative
6928 such as initially loading a vector with zero to use as each of the second
6929 operands would use up an additional register and take an extra instruction,
6930 for no particular gain. */
6933 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
6934 rtx (*reduc) (rtx, rtx, rtx))
6936 enum machine_mode inner = GET_MODE_INNER (mode);
6937 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
6940 for (i = parts / 2; i >= 1; i /= 2)
6942 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
6943 emit_insn (reduc (dest, tmpsum, tmpsum));
6948 /* Initialize a vector with non-constant elements. FIXME: We can do better
6949 than the current implementation (building a vector on the stack and then
6950 loading it) in many cases. See rs6000.c. */
6953 neon_expand_vector_init (rtx target, rtx vals)
6955 enum machine_mode mode = GET_MODE (target);
6956 enum machine_mode inner = GET_MODE_INNER (mode);
6957 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
6960 gcc_assert (VECTOR_MODE_P (mode));
6962 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
6963 for (i = 0; i < n_elts; i++)
6964 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
6965 XVECEXP (vals, 0, i));
6967 emit_move_insn (target, mem);
6970 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
6971 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
6972 reported source locations are bogus. */
6975 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
6980 gcc_assert (GET_CODE (operand) == CONST_INT);
6982 lane = INTVAL (operand);
6984 if (lane < low || lane >= high)
6988 /* Bounds-check lanes. */
6991 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6993 bounds_check (operand, low, high, "lane out of range");
6996 /* Bounds-check constants. */
6999 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7001 bounds_check (operand, low, high, "constant out of range");
7005 neon_element_bits (enum machine_mode mode)
7008 return GET_MODE_BITSIZE (mode);
7010 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
7014 /* Predicates for `match_operand' and `match_operator'. */
7016 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
7018 cirrus_memory_offset (rtx op)
7020 /* Reject eliminable registers. */
7021 if (! (reload_in_progress || reload_completed)
7022 && ( reg_mentioned_p (frame_pointer_rtx, op)
7023 || reg_mentioned_p (arg_pointer_rtx, op)
7024 || reg_mentioned_p (virtual_incoming_args_rtx, op)
7025 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
7026 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
7027 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
7030 if (GET_CODE (op) == MEM)
7036 /* Match: (mem (reg)). */
7037 if (GET_CODE (ind) == REG)
7043 if (GET_CODE (ind) == PLUS
7044 && GET_CODE (XEXP (ind, 0)) == REG
7045 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
7046 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
7053 /* Return TRUE if OP is a valid coprocessor memory address pattern.
7054 WB is true if full writeback address modes are allowed and is false
7055 if limited writeback address modes (POST_INC and PRE_DEC) are
7059 arm_coproc_mem_operand (rtx op, bool wb)
7063 /* Reject eliminable registers. */
7064 if (! (reload_in_progress || reload_completed)
7065 && ( reg_mentioned_p (frame_pointer_rtx, op)
7066 || reg_mentioned_p (arg_pointer_rtx, op)
7067 || reg_mentioned_p (virtual_incoming_args_rtx, op)
7068 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
7069 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
7070 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
7073 /* Constants are converted into offsets from labels. */
7074 if (GET_CODE (op) != MEM)
7079 if (reload_completed
7080 && (GET_CODE (ind) == LABEL_REF
7081 || (GET_CODE (ind) == CONST
7082 && GET_CODE (XEXP (ind, 0)) == PLUS
7083 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
7084 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
7087 /* Match: (mem (reg)). */
7088 if (GET_CODE (ind) == REG)
7089 return arm_address_register_rtx_p (ind, 0);
7091 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
7092 acceptable in any case (subject to verification by
7093 arm_address_register_rtx_p). We need WB to be true to accept
7094 PRE_INC and POST_DEC. */
7095 if (GET_CODE (ind) == POST_INC
7096 || GET_CODE (ind) == PRE_DEC
7098 && (GET_CODE (ind) == PRE_INC
7099 || GET_CODE (ind) == POST_DEC)))
7100 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
7103 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
7104 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
7105 && GET_CODE (XEXP (ind, 1)) == PLUS
7106 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
7107 ind = XEXP (ind, 1);
7112 if (GET_CODE (ind) == PLUS
7113 && GET_CODE (XEXP (ind, 0)) == REG
7114 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
7115 && GET_CODE (XEXP (ind, 1)) == CONST_INT
7116 && INTVAL (XEXP (ind, 1)) > -1024
7117 && INTVAL (XEXP (ind, 1)) < 1024
7118 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
7124 /* Return TRUE if OP is a memory operand which we can load or store a vector
7125 to/from. TYPE is one of the following values:
7126 0 - Vector load/stor (vldr)
7127 1 - Core registers (ldm)
7128 2 - Element/structure loads (vld1)
7131 neon_vector_mem_operand (rtx op, int type)
7135 /* Reject eliminable registers. */
7136 if (! (reload_in_progress || reload_completed)
7137 && ( reg_mentioned_p (frame_pointer_rtx, op)
7138 || reg_mentioned_p (arg_pointer_rtx, op)
7139 || reg_mentioned_p (virtual_incoming_args_rtx, op)
7140 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
7141 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
7142 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
7145 /* Constants are converted into offsets from labels. */
7146 if (GET_CODE (op) != MEM)
7151 if (reload_completed
7152 && (GET_CODE (ind) == LABEL_REF
7153 || (GET_CODE (ind) == CONST
7154 && GET_CODE (XEXP (ind, 0)) == PLUS
7155 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
7156 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
7159 /* Match: (mem (reg)). */
7160 if (GET_CODE (ind) == REG)
7161 return arm_address_register_rtx_p (ind, 0);
7163 /* Allow post-increment with Neon registers. */
7164 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
7165 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
7167 /* FIXME: vld1 allows register post-modify. */
7173 && GET_CODE (ind) == PLUS
7174 && GET_CODE (XEXP (ind, 0)) == REG
7175 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
7176 && GET_CODE (XEXP (ind, 1)) == CONST_INT
7177 && INTVAL (XEXP (ind, 1)) > -1024
7178 && INTVAL (XEXP (ind, 1)) < 1016
7179 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
7185 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
7188 neon_struct_mem_operand (rtx op)
7192 /* Reject eliminable registers. */
7193 if (! (reload_in_progress || reload_completed)
7194 && ( reg_mentioned_p (frame_pointer_rtx, op)
7195 || reg_mentioned_p (arg_pointer_rtx, op)
7196 || reg_mentioned_p (virtual_incoming_args_rtx, op)
7197 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
7198 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
7199 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
7202 /* Constants are converted into offsets from labels. */
7203 if (GET_CODE (op) != MEM)
7208 if (reload_completed
7209 && (GET_CODE (ind) == LABEL_REF
7210 || (GET_CODE (ind) == CONST
7211 && GET_CODE (XEXP (ind, 0)) == PLUS
7212 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
7213 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
7216 /* Match: (mem (reg)). */
7217 if (GET_CODE (ind) == REG)
7218 return arm_address_register_rtx_p (ind, 0);
7223 /* Return true if X is a register that will be eliminated later on. */
7225 arm_eliminable_register (rtx x)
7227 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
7228 || REGNO (x) == ARG_POINTER_REGNUM
7229 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
7230 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
7233 /* Return GENERAL_REGS if a scratch register required to reload x to/from
7234 coprocessor registers. Otherwise return NO_REGS. */
7237 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
7241 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
7243 return GENERAL_REGS;
7247 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7248 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7249 && neon_vector_mem_operand (x, 0))
7252 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
7255 return GENERAL_REGS;
7258 /* Values which must be returned in the most-significant end of the return
7262 arm_return_in_msb (const_tree valtype)
7264 return (TARGET_AAPCS_BASED
7266 && (AGGREGATE_TYPE_P (valtype)
7267 || TREE_CODE (valtype) == COMPLEX_TYPE));
7270 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
7271 Use by the Cirrus Maverick code which has to workaround
7272 a hardware bug triggered by such instructions. */
7274 arm_memory_load_p (rtx insn)
7276 rtx body, lhs, rhs;;
7278 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
7281 body = PATTERN (insn);
7283 if (GET_CODE (body) != SET)
7286 lhs = XEXP (body, 0);
7287 rhs = XEXP (body, 1);
7289 lhs = REG_OR_SUBREG_RTX (lhs);
7291 /* If the destination is not a general purpose
7292 register we do not have to worry. */
7293 if (GET_CODE (lhs) != REG
7294 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
7297 /* As well as loads from memory we also have to react
7298 to loads of invalid constants which will be turned
7299 into loads from the minipool. */
7300 return (GET_CODE (rhs) == MEM
7301 || GET_CODE (rhs) == SYMBOL_REF
7302 || note_invalid_constants (insn, -1, false));
7305 /* Return TRUE if INSN is a Cirrus instruction. */
7307 arm_cirrus_insn_p (rtx insn)
7309 enum attr_cirrus attr;
7311 /* get_attr cannot accept USE or CLOBBER. */
7313 || GET_CODE (insn) != INSN
7314 || GET_CODE (PATTERN (insn)) == USE
7315 || GET_CODE (PATTERN (insn)) == CLOBBER)
7318 attr = get_attr_cirrus (insn);
7320 return attr != CIRRUS_NOT;
7323 /* Cirrus reorg for invalid instruction combinations. */
7325 cirrus_reorg (rtx first)
7327 enum attr_cirrus attr;
7328 rtx body = PATTERN (first);
7332 /* Any branch must be followed by 2 non Cirrus instructions. */
7333 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
7336 t = next_nonnote_insn (first);
7338 if (arm_cirrus_insn_p (t))
7341 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
7345 emit_insn_after (gen_nop (), first);
7350 /* (float (blah)) is in parallel with a clobber. */
7351 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
7352 body = XVECEXP (body, 0, 0);
7354 if (GET_CODE (body) == SET)
7356 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
7358 /* cfldrd, cfldr64, cfstrd, cfstr64 must
7359 be followed by a non Cirrus insn. */
7360 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
7362 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
7363 emit_insn_after (gen_nop (), first);
7367 else if (arm_memory_load_p (first))
7369 unsigned int arm_regno;
7371 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
7372 ldr/cfmv64hr combination where the Rd field is the same
7373 in both instructions must be split with a non Cirrus
7380 /* Get Arm register number for ldr insn. */
7381 if (GET_CODE (lhs) == REG)
7382 arm_regno = REGNO (lhs);
7385 gcc_assert (GET_CODE (rhs) == REG);
7386 arm_regno = REGNO (rhs);
7390 first = next_nonnote_insn (first);
7392 if (! arm_cirrus_insn_p (first))
7395 body = PATTERN (first);
7397 /* (float (blah)) is in parallel with a clobber. */
7398 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
7399 body = XVECEXP (body, 0, 0);
7401 if (GET_CODE (body) == FLOAT)
7402 body = XEXP (body, 0);
7404 if (get_attr_cirrus (first) == CIRRUS_MOVE
7405 && GET_CODE (XEXP (body, 1)) == REG
7406 && arm_regno == REGNO (XEXP (body, 1)))
7407 emit_insn_after (gen_nop (), first);
7413 /* get_attr cannot accept USE or CLOBBER. */
7415 || GET_CODE (first) != INSN
7416 || GET_CODE (PATTERN (first)) == USE
7417 || GET_CODE (PATTERN (first)) == CLOBBER)
7420 attr = get_attr_cirrus (first);
7422 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
7423 must be followed by a non-coprocessor instruction. */
7424 if (attr == CIRRUS_COMPARE)
7428 t = next_nonnote_insn (first);
7430 if (arm_cirrus_insn_p (t))
7433 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
7437 emit_insn_after (gen_nop (), first);
7443 /* Return TRUE if X references a SYMBOL_REF. */
7445 symbol_mentioned_p (rtx x)
7450 if (GET_CODE (x) == SYMBOL_REF)
7453 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
7454 are constant offsets, not symbols. */
7455 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7458 fmt = GET_RTX_FORMAT (GET_CODE (x));
7460 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7466 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7467 if (symbol_mentioned_p (XVECEXP (x, i, j)))
7470 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
7477 /* Return TRUE if X references a LABEL_REF. */
7479 label_mentioned_p (rtx x)
7484 if (GET_CODE (x) == LABEL_REF)
7487 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
7488 instruction, but they are constant offsets, not symbols. */
7489 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7492 fmt = GET_RTX_FORMAT (GET_CODE (x));
7493 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7499 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7500 if (label_mentioned_p (XVECEXP (x, i, j)))
7503 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
7511 tls_mentioned_p (rtx x)
7513 switch (GET_CODE (x))
7516 return tls_mentioned_p (XEXP (x, 0));
7519 if (XINT (x, 1) == UNSPEC_TLS)
7527 /* Must not copy a SET whose source operand is PC-relative. */
7530 arm_cannot_copy_insn_p (rtx insn)
7532 rtx pat = PATTERN (insn);
7534 if (GET_CODE (pat) == SET)
7536 rtx rhs = SET_SRC (pat);
7538 if (GET_CODE (rhs) == UNSPEC
7539 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
7542 if (GET_CODE (rhs) == MEM
7543 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
7544 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
7554 enum rtx_code code = GET_CODE (x);
7571 /* Return 1 if memory locations are adjacent. */
7573 adjacent_mem_locations (rtx a, rtx b)
7575 /* We don't guarantee to preserve the order of these memory refs. */
7576 if (volatile_refs_p (a) || volatile_refs_p (b))
7579 if ((GET_CODE (XEXP (a, 0)) == REG
7580 || (GET_CODE (XEXP (a, 0)) == PLUS
7581 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
7582 && (GET_CODE (XEXP (b, 0)) == REG
7583 || (GET_CODE (XEXP (b, 0)) == PLUS
7584 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
7586 HOST_WIDE_INT val0 = 0, val1 = 0;
7590 if (GET_CODE (XEXP (a, 0)) == PLUS)
7592 reg0 = XEXP (XEXP (a, 0), 0);
7593 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
7598 if (GET_CODE (XEXP (b, 0)) == PLUS)
7600 reg1 = XEXP (XEXP (b, 0), 0);
7601 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
7606 /* Don't accept any offset that will require multiple
7607 instructions to handle, since this would cause the
7608 arith_adjacentmem pattern to output an overlong sequence. */
7609 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
7612 /* Don't allow an eliminable register: register elimination can make
7613 the offset too large. */
7614 if (arm_eliminable_register (reg0))
7617 val_diff = val1 - val0;
7621 /* If the target has load delay slots, then there's no benefit
7622 to using an ldm instruction unless the offset is zero and
7623 we are optimizing for size. */
7624 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
7625 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
7626 && (val_diff == 4 || val_diff == -4));
7629 return ((REGNO (reg0) == REGNO (reg1))
7630 && (val_diff == 4 || val_diff == -4));
7637 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7638 HOST_WIDE_INT *load_offset)
7640 int unsorted_regs[4];
7641 HOST_WIDE_INT unsorted_offsets[4];
7646 /* Can only handle 2, 3, or 4 insns at present,
7647 though could be easily extended if required. */
7648 gcc_assert (nops >= 2 && nops <= 4);
7650 memset (order, 0, 4 * sizeof (int));
7652 /* Loop over the operands and check that the memory references are
7653 suitable (i.e. immediate offsets from the same base register). At
7654 the same time, extract the target register, and the memory
7656 for (i = 0; i < nops; i++)
7661 /* Convert a subreg of a mem into the mem itself. */
7662 if (GET_CODE (operands[nops + i]) == SUBREG)
7663 operands[nops + i] = alter_subreg (operands + (nops + i));
7665 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7667 /* Don't reorder volatile memory references; it doesn't seem worth
7668 looking for the case where the order is ok anyway. */
7669 if (MEM_VOLATILE_P (operands[nops + i]))
7672 offset = const0_rtx;
7674 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7675 || (GET_CODE (reg) == SUBREG
7676 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7677 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7678 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7680 || (GET_CODE (reg) == SUBREG
7681 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7682 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7687 base_reg = REGNO (reg);
7688 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7689 ? REGNO (operands[i])
7690 : REGNO (SUBREG_REG (operands[i])));
7695 if (base_reg != (int) REGNO (reg))
7696 /* Not addressed from the same base register. */
7699 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7700 ? REGNO (operands[i])
7701 : REGNO (SUBREG_REG (operands[i])));
7702 if (unsorted_regs[i] < unsorted_regs[order[0]])
7706 /* If it isn't an integer register, or if it overwrites the
7707 base register but isn't the last insn in the list, then
7708 we can't do this. */
7709 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
7710 || (i != nops - 1 && unsorted_regs[i] == base_reg))
7713 unsorted_offsets[i] = INTVAL (offset);
7716 /* Not a suitable memory address. */
7720 /* All the useful information has now been extracted from the
7721 operands into unsorted_regs and unsorted_offsets; additionally,
7722 order[0] has been set to the lowest numbered register in the
7723 list. Sort the registers into order, and check that the memory
7724 offsets are ascending and adjacent. */
7726 for (i = 1; i < nops; i++)
7730 order[i] = order[i - 1];
7731 for (j = 0; j < nops; j++)
7732 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7733 && (order[i] == order[i - 1]
7734 || unsorted_regs[j] < unsorted_regs[order[i]]))
7737 /* Have we found a suitable register? if not, one must be used more
7739 if (order[i] == order[i - 1])
7742 /* Is the memory address adjacent and ascending? */
7743 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7751 for (i = 0; i < nops; i++)
7752 regs[i] = unsorted_regs[order[i]];
7754 *load_offset = unsorted_offsets[order[0]];
7757 if (unsorted_offsets[order[0]] == 0)
7758 return 1; /* ldmia */
7760 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
7761 return 2; /* ldmib */
7763 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
7764 return 3; /* ldmda */
7766 if (unsorted_offsets[order[nops - 1]] == -4)
7767 return 4; /* ldmdb */
7769 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
7770 if the offset isn't small enough. The reason 2 ldrs are faster
7771 is because these ARMs are able to do more than one cache access
7772 in a single cycle. The ARM9 and StrongARM have Harvard caches,
7773 whilst the ARM8 has a double bandwidth cache. This means that
7774 these cores can do both an instruction fetch and a data fetch in
7775 a single cycle, so the trick of calculating the address into a
7776 scratch register (one of the result regs) and then doing a load
7777 multiple actually becomes slower (and no smaller in code size).
7778 That is the transformation
7780 ldr rd1, [rbase + offset]
7781 ldr rd2, [rbase + offset + 4]
7785 add rd1, rbase, offset
7786 ldmia rd1, {rd1, rd2}
7788 produces worse code -- '3 cycles + any stalls on rd2' instead of
7789 '2 cycles + any stalls on rd2'. On ARMs with only one cache
7790 access per cycle, the first sequence could never complete in less
7791 than 6 cycles, whereas the ldm sequence would only take 5 and
7792 would make better use of sequential accesses if not hitting the
7795 We cheat here and test 'arm_ld_sched' which we currently know to
7796 only be true for the ARM8, ARM9 and StrongARM. If this ever
7797 changes, then the test below needs to be reworked. */
7798 if (nops == 2 && arm_ld_sched)
7801 /* Can't do it without setting up the offset, only do this if it takes
7802 no more than one insn. */
7803 return (const_ok_for_arm (unsorted_offsets[order[0]])
7804 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
7808 emit_ldm_seq (rtx *operands, int nops)
7812 HOST_WIDE_INT offset;
7816 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7819 strcpy (buf, "ldm%(ia%)\t");
7823 strcpy (buf, "ldm%(ib%)\t");
7827 strcpy (buf, "ldm%(da%)\t");
7831 strcpy (buf, "ldm%(db%)\t");
7836 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7837 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7840 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7841 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7843 output_asm_insn (buf, operands);
7845 strcpy (buf, "ldm%(ia%)\t");
7852 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7853 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7855 for (i = 1; i < nops; i++)
7856 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7857 reg_names[regs[i]]);
7859 strcat (buf, "}\t%@ phole ldm");
7861 output_asm_insn (buf, operands);
7866 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7867 HOST_WIDE_INT * load_offset)
7869 int unsorted_regs[4];
7870 HOST_WIDE_INT unsorted_offsets[4];
7875 /* Can only handle 2, 3, or 4 insns at present, though could be easily
7876 extended if required. */
7877 gcc_assert (nops >= 2 && nops <= 4);
7879 memset (order, 0, 4 * sizeof (int));
7881 /* Loop over the operands and check that the memory references are
7882 suitable (i.e. immediate offsets from the same base register). At
7883 the same time, extract the target register, and the memory
7885 for (i = 0; i < nops; i++)
7890 /* Convert a subreg of a mem into the mem itself. */
7891 if (GET_CODE (operands[nops + i]) == SUBREG)
7892 operands[nops + i] = alter_subreg (operands + (nops + i));
7894 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7896 /* Don't reorder volatile memory references; it doesn't seem worth
7897 looking for the case where the order is ok anyway. */
7898 if (MEM_VOLATILE_P (operands[nops + i]))
7901 offset = const0_rtx;
7903 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7904 || (GET_CODE (reg) == SUBREG
7905 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7906 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7907 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7909 || (GET_CODE (reg) == SUBREG
7910 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7911 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7916 base_reg = REGNO (reg);
7917 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7918 ? REGNO (operands[i])
7919 : REGNO (SUBREG_REG (operands[i])));
7924 if (base_reg != (int) REGNO (reg))
7925 /* Not addressed from the same base register. */
7928 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7929 ? REGNO (operands[i])
7930 : REGNO (SUBREG_REG (operands[i])));
7931 if (unsorted_regs[i] < unsorted_regs[order[0]])
7935 /* If it isn't an integer register, then we can't do this. */
7936 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
7939 unsorted_offsets[i] = INTVAL (offset);
7942 /* Not a suitable memory address. */
7946 /* All the useful information has now been extracted from the
7947 operands into unsorted_regs and unsorted_offsets; additionally,
7948 order[0] has been set to the lowest numbered register in the
7949 list. Sort the registers into order, and check that the memory
7950 offsets are ascending and adjacent. */
7952 for (i = 1; i < nops; i++)
7956 order[i] = order[i - 1];
7957 for (j = 0; j < nops; j++)
7958 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7959 && (order[i] == order[i - 1]
7960 || unsorted_regs[j] < unsorted_regs[order[i]]))
7963 /* Have we found a suitable register? if not, one must be used more
7965 if (order[i] == order[i - 1])
7968 /* Is the memory address adjacent and ascending? */
7969 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7977 for (i = 0; i < nops; i++)
7978 regs[i] = unsorted_regs[order[i]];
7980 *load_offset = unsorted_offsets[order[0]];
7983 if (unsorted_offsets[order[0]] == 0)
7984 return 1; /* stmia */
7986 if (unsorted_offsets[order[0]] == 4)
7987 return 2; /* stmib */
7989 if (unsorted_offsets[order[nops - 1]] == 0)
7990 return 3; /* stmda */
7992 if (unsorted_offsets[order[nops - 1]] == -4)
7993 return 4; /* stmdb */
7999 emit_stm_seq (rtx *operands, int nops)
8003 HOST_WIDE_INT offset;
8007 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
8010 strcpy (buf, "stm%(ia%)\t");
8014 strcpy (buf, "stm%(ib%)\t");
8018 strcpy (buf, "stm%(da%)\t");
8022 strcpy (buf, "stm%(db%)\t");
8029 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
8030 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
8032 for (i = 1; i < nops; i++)
8033 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
8034 reg_names[regs[i]]);
8036 strcat (buf, "}\t%@ phole stm");
8038 output_asm_insn (buf, operands);
8042 /* Routines for use in generating RTL. */
8045 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
8046 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
8048 HOST_WIDE_INT offset = *offsetp;
8051 int sign = up ? 1 : -1;
8054 /* XScale has load-store double instructions, but they have stricter
8055 alignment requirements than load-store multiple, so we cannot
8058 For XScale ldm requires 2 + NREGS cycles to complete and blocks
8059 the pipeline until completion.
8067 An ldr instruction takes 1-3 cycles, but does not block the
8076 Best case ldr will always win. However, the more ldr instructions
8077 we issue, the less likely we are to be able to schedule them well.
8078 Using ldr instructions also increases code size.
8080 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
8081 for counts of 3 or 4 regs. */
8082 if (arm_tune_xscale && count <= 2 && ! optimize_size)
8088 for (i = 0; i < count; i++)
8090 addr = plus_constant (from, i * 4 * sign);
8091 mem = adjust_automodify_address (basemem, SImode, addr, offset);
8092 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
8098 emit_move_insn (from, plus_constant (from, count * 4 * sign));
8108 result = gen_rtx_PARALLEL (VOIDmode,
8109 rtvec_alloc (count + (write_back ? 1 : 0)));
8112 XVECEXP (result, 0, 0)
8113 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
8118 for (j = 0; i < count; i++, j++)
8120 addr = plus_constant (from, j * 4 * sign);
8121 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
8122 XVECEXP (result, 0, i)
8123 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
8134 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
8135 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
8137 HOST_WIDE_INT offset = *offsetp;
8140 int sign = up ? 1 : -1;
8143 /* See arm_gen_load_multiple for discussion of
8144 the pros/cons of ldm/stm usage for XScale. */
8145 if (arm_tune_xscale && count <= 2 && ! optimize_size)
8151 for (i = 0; i < count; i++)
8153 addr = plus_constant (to, i * 4 * sign);
8154 mem = adjust_automodify_address (basemem, SImode, addr, offset);
8155 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
8161 emit_move_insn (to, plus_constant (to, count * 4 * sign));
8171 result = gen_rtx_PARALLEL (VOIDmode,
8172 rtvec_alloc (count + (write_back ? 1 : 0)));
8175 XVECEXP (result, 0, 0)
8176 = gen_rtx_SET (VOIDmode, to,
8177 plus_constant (to, count * 4 * sign));
8182 for (j = 0; i < count; i++, j++)
8184 addr = plus_constant (to, j * 4 * sign);
8185 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
8186 XVECEXP (result, 0, i)
8187 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
8198 arm_gen_movmemqi (rtx *operands)
8200 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
8201 HOST_WIDE_INT srcoffset, dstoffset;
8203 rtx src, dst, srcbase, dstbase;
8204 rtx part_bytes_reg = NULL;
8207 if (GET_CODE (operands[2]) != CONST_INT
8208 || GET_CODE (operands[3]) != CONST_INT
8209 || INTVAL (operands[2]) > 64
8210 || INTVAL (operands[3]) & 3)
8213 dstbase = operands[0];
8214 srcbase = operands[1];
8216 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
8217 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
8219 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
8220 out_words_to_go = INTVAL (operands[2]) / 4;
8221 last_bytes = INTVAL (operands[2]) & 3;
8222 dstoffset = srcoffset = 0;
8224 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
8225 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
8227 for (i = 0; in_words_to_go >= 2; i+=4)
8229 if (in_words_to_go > 4)
8230 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
8231 srcbase, &srcoffset));
8233 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
8234 FALSE, srcbase, &srcoffset));
8236 if (out_words_to_go)
8238 if (out_words_to_go > 4)
8239 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
8240 dstbase, &dstoffset));
8241 else if (out_words_to_go != 1)
8242 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
8246 dstbase, &dstoffset));
8249 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
8250 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
8251 if (last_bytes != 0)
8253 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
8259 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
8260 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
8263 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
8264 if (out_words_to_go)
8268 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
8269 sreg = copy_to_reg (mem);
8271 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
8272 emit_move_insn (mem, sreg);
8275 gcc_assert (!in_words_to_go); /* Sanity check */
8280 gcc_assert (in_words_to_go > 0);
8282 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
8283 part_bytes_reg = copy_to_mode_reg (SImode, mem);
8286 gcc_assert (!last_bytes || part_bytes_reg);
8288 if (BYTES_BIG_ENDIAN && last_bytes)
8290 rtx tmp = gen_reg_rtx (SImode);
8292 /* The bytes we want are in the top end of the word. */
8293 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
8294 GEN_INT (8 * (4 - last_bytes))));
8295 part_bytes_reg = tmp;
8299 mem = adjust_automodify_address (dstbase, QImode,
8300 plus_constant (dst, last_bytes - 1),
8301 dstoffset + last_bytes - 1);
8302 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
8306 tmp = gen_reg_rtx (SImode);
8307 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
8308 part_bytes_reg = tmp;
8317 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
8318 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
8322 rtx tmp = gen_reg_rtx (SImode);
8323 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
8324 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
8325 part_bytes_reg = tmp;
8332 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
8333 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
8340 /* Select a dominance comparison mode if possible for a test of the general
8341 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
8342 COND_OR == DOM_CC_X_AND_Y => (X && Y)
8343 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
8344 COND_OR == DOM_CC_X_OR_Y => (X || Y)
8345 In all cases OP will be either EQ or NE, but we don't need to know which
8346 here. If we are unable to support a dominance comparison we return
8347 CC mode. This will then fail to match for the RTL expressions that
8348 generate this call. */
8350 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
8352 enum rtx_code cond1, cond2;
8355 /* Currently we will probably get the wrong result if the individual
8356 comparisons are not simple. This also ensures that it is safe to
8357 reverse a comparison if necessary. */
8358 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
8360 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
8364 /* The if_then_else variant of this tests the second condition if the
8365 first passes, but is true if the first fails. Reverse the first
8366 condition to get a true "inclusive-or" expression. */
8367 if (cond_or == DOM_CC_NX_OR_Y)
8368 cond1 = reverse_condition (cond1);
8370 /* If the comparisons are not equal, and one doesn't dominate the other,
8371 then we can't do this. */
8373 && !comparison_dominates_p (cond1, cond2)
8374 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
8379 enum rtx_code temp = cond1;
8387 if (cond_or == DOM_CC_X_AND_Y)
8392 case EQ: return CC_DEQmode;
8393 case LE: return CC_DLEmode;
8394 case LEU: return CC_DLEUmode;
8395 case GE: return CC_DGEmode;
8396 case GEU: return CC_DGEUmode;
8397 default: gcc_unreachable ();
8401 if (cond_or == DOM_CC_X_AND_Y)
8417 if (cond_or == DOM_CC_X_AND_Y)
8433 if (cond_or == DOM_CC_X_AND_Y)
8449 if (cond_or == DOM_CC_X_AND_Y)
8464 /* The remaining cases only occur when both comparisons are the
8467 gcc_assert (cond1 == cond2);
8471 gcc_assert (cond1 == cond2);
8475 gcc_assert (cond1 == cond2);
8479 gcc_assert (cond1 == cond2);
8483 gcc_assert (cond1 == cond2);
8492 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
8494 /* All floating point compares return CCFP if it is an equality
8495 comparison, and CCFPE otherwise. */
8496 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
8516 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
8525 /* A compare with a shifted operand. Because of canonicalization, the
8526 comparison will have to be swapped when we emit the assembler. */
8527 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
8528 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
8529 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
8530 || GET_CODE (x) == ROTATERT))
8533 /* This operation is performed swapped, but since we only rely on the Z
8534 flag we don't need an additional mode. */
8535 if (GET_MODE (y) == SImode && REG_P (y)
8536 && GET_CODE (x) == NEG
8537 && (op == EQ || op == NE))
8540 /* This is a special case that is used by combine to allow a
8541 comparison of a shifted byte load to be split into a zero-extend
8542 followed by a comparison of the shifted integer (only valid for
8543 equalities and unsigned inequalities). */
8544 if (GET_MODE (x) == SImode
8545 && GET_CODE (x) == ASHIFT
8546 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
8547 && GET_CODE (XEXP (x, 0)) == SUBREG
8548 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
8549 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
8550 && (op == EQ || op == NE
8551 || op == GEU || op == GTU || op == LTU || op == LEU)
8552 && GET_CODE (y) == CONST_INT)
8555 /* A construct for a conditional compare, if the false arm contains
8556 0, then both conditions must be true, otherwise either condition
8557 must be true. Not all conditions are possible, so CCmode is
8558 returned if it can't be done. */
8559 if (GET_CODE (x) == IF_THEN_ELSE
8560 && (XEXP (x, 2) == const0_rtx
8561 || XEXP (x, 2) == const1_rtx)
8562 && COMPARISON_P (XEXP (x, 0))
8563 && COMPARISON_P (XEXP (x, 1)))
8564 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
8565 INTVAL (XEXP (x, 2)));
8567 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
8568 if (GET_CODE (x) == AND
8569 && COMPARISON_P (XEXP (x, 0))
8570 && COMPARISON_P (XEXP (x, 1)))
8571 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
8574 if (GET_CODE (x) == IOR
8575 && COMPARISON_P (XEXP (x, 0))
8576 && COMPARISON_P (XEXP (x, 1)))
8577 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
8580 /* An operation (on Thumb) where we want to test for a single bit.
8581 This is done by shifting that bit up into the top bit of a
8582 scratch register; we can then branch on the sign bit. */
8584 && GET_MODE (x) == SImode
8585 && (op == EQ || op == NE)
8586 && GET_CODE (x) == ZERO_EXTRACT
8587 && XEXP (x, 1) == const1_rtx)
8590 /* An operation that sets the condition codes as a side-effect, the
8591 V flag is not set correctly, so we can only use comparisons where
8592 this doesn't matter. (For LT and GE we can use "mi" and "pl"
8594 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
8595 if (GET_MODE (x) == SImode
8597 && (op == EQ || op == NE || op == LT || op == GE)
8598 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
8599 || GET_CODE (x) == AND || GET_CODE (x) == IOR
8600 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
8601 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
8602 || GET_CODE (x) == LSHIFTRT
8603 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
8604 || GET_CODE (x) == ROTATERT
8605 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
8608 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
8611 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
8612 && GET_CODE (x) == PLUS
8613 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
8619 /* X and Y are two things to compare using CODE. Emit the compare insn and
8620 return the rtx for register 0 in the proper mode. FP means this is a
8621 floating point compare: I don't think that it is needed on the arm. */
8623 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
8625 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
8626 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
8628 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
8633 /* Generate a sequence of insns that will generate the correct return
8634 address mask depending on the physical architecture that the program
8637 arm_gen_return_addr_mask (void)
8639 rtx reg = gen_reg_rtx (Pmode);
8641 emit_insn (gen_return_addr_mask (reg));
8646 arm_reload_in_hi (rtx *operands)
8648 rtx ref = operands[1];
8650 HOST_WIDE_INT offset = 0;
8652 if (GET_CODE (ref) == SUBREG)
8654 offset = SUBREG_BYTE (ref);
8655 ref = SUBREG_REG (ref);
8658 if (GET_CODE (ref) == REG)
8660 /* We have a pseudo which has been spilt onto the stack; there
8661 are two cases here: the first where there is a simple
8662 stack-slot replacement and a second where the stack-slot is
8663 out of range, or is used as a subreg. */
8664 if (reg_equiv_mem[REGNO (ref)])
8666 ref = reg_equiv_mem[REGNO (ref)];
8667 base = find_replacement (&XEXP (ref, 0));
8670 /* The slot is out of range, or was dressed up in a SUBREG. */
8671 base = reg_equiv_address[REGNO (ref)];
8674 base = find_replacement (&XEXP (ref, 0));
8676 /* Handle the case where the address is too complex to be offset by 1. */
8677 if (GET_CODE (base) == MINUS
8678 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8680 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8682 emit_set_insn (base_plus, base);
8685 else if (GET_CODE (base) == PLUS)
8687 /* The addend must be CONST_INT, or we would have dealt with it above. */
8688 HOST_WIDE_INT hi, lo;
8690 offset += INTVAL (XEXP (base, 1));
8691 base = XEXP (base, 0);
8693 /* Rework the address into a legal sequence of insns. */
8694 /* Valid range for lo is -4095 -> 4095 */
8697 : -((-offset) & 0xfff));
8699 /* Corner case, if lo is the max offset then we would be out of range
8700 once we have added the additional 1 below, so bump the msb into the
8701 pre-loading insn(s). */
8705 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8706 ^ (HOST_WIDE_INT) 0x80000000)
8707 - (HOST_WIDE_INT) 0x80000000);
8709 gcc_assert (hi + lo == offset);
8713 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8715 /* Get the base address; addsi3 knows how to handle constants
8716 that require more than one insn. */
8717 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8723 /* Operands[2] may overlap operands[0] (though it won't overlap
8724 operands[1]), that's why we asked for a DImode reg -- so we can
8725 use the bit that does not overlap. */
8726 if (REGNO (operands[2]) == REGNO (operands[0]))
8727 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8729 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8731 emit_insn (gen_zero_extendqisi2 (scratch,
8732 gen_rtx_MEM (QImode,
8733 plus_constant (base,
8735 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
8736 gen_rtx_MEM (QImode,
8737 plus_constant (base,
8739 if (!BYTES_BIG_ENDIAN)
8740 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8741 gen_rtx_IOR (SImode,
8744 gen_rtx_SUBREG (SImode, operands[0], 0),
8748 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8749 gen_rtx_IOR (SImode,
8750 gen_rtx_ASHIFT (SImode, scratch,
8752 gen_rtx_SUBREG (SImode, operands[0], 0)));
8755 /* Handle storing a half-word to memory during reload by synthesizing as two
8756 byte stores. Take care not to clobber the input values until after we
8757 have moved them somewhere safe. This code assumes that if the DImode
8758 scratch in operands[2] overlaps either the input value or output address
8759 in some way, then that value must die in this insn (we absolutely need
8760 two scratch registers for some corner cases). */
8762 arm_reload_out_hi (rtx *operands)
8764 rtx ref = operands[0];
8765 rtx outval = operands[1];
8767 HOST_WIDE_INT offset = 0;
8769 if (GET_CODE (ref) == SUBREG)
8771 offset = SUBREG_BYTE (ref);
8772 ref = SUBREG_REG (ref);
8775 if (GET_CODE (ref) == REG)
8777 /* We have a pseudo which has been spilt onto the stack; there
8778 are two cases here: the first where there is a simple
8779 stack-slot replacement and a second where the stack-slot is
8780 out of range, or is used as a subreg. */
8781 if (reg_equiv_mem[REGNO (ref)])
8783 ref = reg_equiv_mem[REGNO (ref)];
8784 base = find_replacement (&XEXP (ref, 0));
8787 /* The slot is out of range, or was dressed up in a SUBREG. */
8788 base = reg_equiv_address[REGNO (ref)];
8791 base = find_replacement (&XEXP (ref, 0));
8793 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8795 /* Handle the case where the address is too complex to be offset by 1. */
8796 if (GET_CODE (base) == MINUS
8797 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8799 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8801 /* Be careful not to destroy OUTVAL. */
8802 if (reg_overlap_mentioned_p (base_plus, outval))
8804 /* Updating base_plus might destroy outval, see if we can
8805 swap the scratch and base_plus. */
8806 if (!reg_overlap_mentioned_p (scratch, outval))
8809 scratch = base_plus;
8814 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8816 /* Be conservative and copy OUTVAL into the scratch now,
8817 this should only be necessary if outval is a subreg
8818 of something larger than a word. */
8819 /* XXX Might this clobber base? I can't see how it can,
8820 since scratch is known to overlap with OUTVAL, and
8821 must be wider than a word. */
8822 emit_insn (gen_movhi (scratch_hi, outval));
8823 outval = scratch_hi;
8827 emit_set_insn (base_plus, base);
8830 else if (GET_CODE (base) == PLUS)
8832 /* The addend must be CONST_INT, or we would have dealt with it above. */
8833 HOST_WIDE_INT hi, lo;
8835 offset += INTVAL (XEXP (base, 1));
8836 base = XEXP (base, 0);
8838 /* Rework the address into a legal sequence of insns. */
8839 /* Valid range for lo is -4095 -> 4095 */
8842 : -((-offset) & 0xfff));
8844 /* Corner case, if lo is the max offset then we would be out of range
8845 once we have added the additional 1 below, so bump the msb into the
8846 pre-loading insn(s). */
8850 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8851 ^ (HOST_WIDE_INT) 0x80000000)
8852 - (HOST_WIDE_INT) 0x80000000);
8854 gcc_assert (hi + lo == offset);
8858 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8860 /* Be careful not to destroy OUTVAL. */
8861 if (reg_overlap_mentioned_p (base_plus, outval))
8863 /* Updating base_plus might destroy outval, see if we
8864 can swap the scratch and base_plus. */
8865 if (!reg_overlap_mentioned_p (scratch, outval))
8868 scratch = base_plus;
8873 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8875 /* Be conservative and copy outval into scratch now,
8876 this should only be necessary if outval is a
8877 subreg of something larger than a word. */
8878 /* XXX Might this clobber base? I can't see how it
8879 can, since scratch is known to overlap with
8881 emit_insn (gen_movhi (scratch_hi, outval));
8882 outval = scratch_hi;
8886 /* Get the base address; addsi3 knows how to handle constants
8887 that require more than one insn. */
8888 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8894 if (BYTES_BIG_ENDIAN)
8896 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8897 plus_constant (base, offset + 1)),
8898 gen_lowpart (QImode, outval)));
8899 emit_insn (gen_lshrsi3 (scratch,
8900 gen_rtx_SUBREG (SImode, outval, 0),
8902 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8903 gen_lowpart (QImode, scratch)));
8907 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8908 gen_lowpart (QImode, outval)));
8909 emit_insn (gen_lshrsi3 (scratch,
8910 gen_rtx_SUBREG (SImode, outval, 0),
8912 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8913 plus_constant (base, offset + 1)),
8914 gen_lowpart (QImode, scratch)));
8918 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
8919 (padded to the size of a word) should be passed in a register. */
8922 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
8924 if (TARGET_AAPCS_BASED)
8925 return must_pass_in_stack_var_size (mode, type);
8927 return must_pass_in_stack_var_size_or_pad (mode, type);
8931 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
8932 Return true if an argument passed on the stack should be padded upwards,
8933 i.e. if the least-significant byte has useful data.
8934 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
8935 aggregate types are placed in the lowest memory address. */
8938 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
8940 if (!TARGET_AAPCS_BASED)
8941 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
8943 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
8950 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
8951 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
8952 byte of the register has useful data, and return the opposite if the
8953 most significant byte does.
8954 For AAPCS, small aggregates and small complex types are always padded
8958 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
8959 tree type, int first ATTRIBUTE_UNUSED)
8961 if (TARGET_AAPCS_BASED
8963 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
8964 && int_size_in_bytes (type) <= 4)
8967 /* Otherwise, use default padding. */
8968 return !BYTES_BIG_ENDIAN;
8972 /* Print a symbolic form of X to the debug file, F. */
8974 arm_print_value (FILE *f, rtx x)
8976 switch (GET_CODE (x))
8979 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
8983 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
8991 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
8993 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
8994 if (i < (CONST_VECTOR_NUNITS (x) - 1))
9002 fprintf (f, "\"%s\"", XSTR (x, 0));
9006 fprintf (f, "`%s'", XSTR (x, 0));
9010 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
9014 arm_print_value (f, XEXP (x, 0));
9018 arm_print_value (f, XEXP (x, 0));
9020 arm_print_value (f, XEXP (x, 1));
9028 fprintf (f, "????");
9033 /* Routines for manipulation of the constant pool. */
9035 /* Arm instructions cannot load a large constant directly into a
9036 register; they have to come from a pc relative load. The constant
9037 must therefore be placed in the addressable range of the pc
9038 relative load. Depending on the precise pc relative load
9039 instruction the range is somewhere between 256 bytes and 4k. This
9040 means that we often have to dump a constant inside a function, and
9041 generate code to branch around it.
9043 It is important to minimize this, since the branches will slow
9044 things down and make the code larger.
9046 Normally we can hide the table after an existing unconditional
9047 branch so that there is no interruption of the flow, but in the
9048 worst case the code looks like this:
9066 We fix this by performing a scan after scheduling, which notices
9067 which instructions need to have their operands fetched from the
9068 constant table and builds the table.
9070 The algorithm starts by building a table of all the constants that
9071 need fixing up and all the natural barriers in the function (places
9072 where a constant table can be dropped without breaking the flow).
9073 For each fixup we note how far the pc-relative replacement will be
9074 able to reach and the offset of the instruction into the function.
9076 Having built the table we then group the fixes together to form
9077 tables that are as large as possible (subject to addressing
9078 constraints) and emit each table of constants after the last
9079 barrier that is within range of all the instructions in the group.
9080 If a group does not contain a barrier, then we forcibly create one
9081 by inserting a jump instruction into the flow. Once the table has
9082 been inserted, the insns are then modified to reference the
9083 relevant entry in the pool.
9085 Possible enhancements to the algorithm (not implemented) are:
9087 1) For some processors and object formats, there may be benefit in
9088 aligning the pools to the start of cache lines; this alignment
9089 would need to be taken into account when calculating addressability
9092 /* These typedefs are located at the start of this file, so that
9093 they can be used in the prototypes there. This comment is to
9094 remind readers of that fact so that the following structures
9095 can be understood more easily.
9097 typedef struct minipool_node Mnode;
9098 typedef struct minipool_fixup Mfix; */
9100 struct minipool_node
9102 /* Doubly linked chain of entries. */
9105 /* The maximum offset into the code that this entry can be placed. While
9106 pushing fixes for forward references, all entries are sorted in order
9107 of increasing max_address. */
9108 HOST_WIDE_INT max_address;
9109 /* Similarly for an entry inserted for a backwards ref. */
9110 HOST_WIDE_INT min_address;
9111 /* The number of fixes referencing this entry. This can become zero
9112 if we "unpush" an entry. In this case we ignore the entry when we
9113 come to emit the code. */
9115 /* The offset from the start of the minipool. */
9116 HOST_WIDE_INT offset;
9117 /* The value in table. */
9119 /* The mode of value. */
9120 enum machine_mode mode;
9121 /* The size of the value. With iWMMXt enabled
9122 sizes > 4 also imply an alignment of 8-bytes. */
9126 struct minipool_fixup
9130 HOST_WIDE_INT address;
9132 enum machine_mode mode;
9136 HOST_WIDE_INT forwards;
9137 HOST_WIDE_INT backwards;
9140 /* Fixes less than a word need padding out to a word boundary. */
9141 #define MINIPOOL_FIX_SIZE(mode) \
9142 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
9144 static Mnode * minipool_vector_head;
9145 static Mnode * minipool_vector_tail;
9146 static rtx minipool_vector_label;
9147 static int minipool_pad;
9149 /* The linked list of all minipool fixes required for this function. */
9150 Mfix * minipool_fix_head;
9151 Mfix * minipool_fix_tail;
9152 /* The fix entry for the current minipool, once it has been placed. */
9153 Mfix * minipool_barrier;
9155 /* Determines if INSN is the start of a jump table. Returns the end
9156 of the TABLE or NULL_RTX. */
9158 is_jump_table (rtx insn)
9162 if (GET_CODE (insn) == JUMP_INSN
9163 && JUMP_LABEL (insn) != NULL
9164 && ((table = next_real_insn (JUMP_LABEL (insn)))
9165 == next_real_insn (insn))
9167 && GET_CODE (table) == JUMP_INSN
9168 && (GET_CODE (PATTERN (table)) == ADDR_VEC
9169 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
9175 #ifndef JUMP_TABLES_IN_TEXT_SECTION
9176 #define JUMP_TABLES_IN_TEXT_SECTION 0
9179 static HOST_WIDE_INT
9180 get_jump_table_size (rtx insn)
9182 /* ADDR_VECs only take room if read-only data does into the text
9184 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
9186 rtx body = PATTERN (insn);
9187 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
9189 HOST_WIDE_INT modesize;
9191 modesize = GET_MODE_SIZE (GET_MODE (body));
9192 size = modesize * XVECLEN (body, elt);
9196 /* Round up size of TBB table to a halfword boundary. */
9197 size = (size + 1) & ~(HOST_WIDE_INT)1;
9200 /* No padding necessary for TBH. */
9203 /* Add two bytes for alignment on Thumb. */
9216 /* Move a minipool fix MP from its current location to before MAX_MP.
9217 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
9218 constraints may need updating. */
9220 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
9221 HOST_WIDE_INT max_address)
9223 /* The code below assumes these are different. */
9224 gcc_assert (mp != max_mp);
9228 if (max_address < mp->max_address)
9229 mp->max_address = max_address;
9233 if (max_address > max_mp->max_address - mp->fix_size)
9234 mp->max_address = max_mp->max_address - mp->fix_size;
9236 mp->max_address = max_address;
9238 /* Unlink MP from its current position. Since max_mp is non-null,
9239 mp->prev must be non-null. */
9240 mp->prev->next = mp->next;
9241 if (mp->next != NULL)
9242 mp->next->prev = mp->prev;
9244 minipool_vector_tail = mp->prev;
9246 /* Re-insert it before MAX_MP. */
9248 mp->prev = max_mp->prev;
9251 if (mp->prev != NULL)
9252 mp->prev->next = mp;
9254 minipool_vector_head = mp;
9257 /* Save the new entry. */
9260 /* Scan over the preceding entries and adjust their addresses as
9262 while (mp->prev != NULL
9263 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
9265 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
9272 /* Add a constant to the minipool for a forward reference. Returns the
9273 node added or NULL if the constant will not fit in this pool. */
9275 add_minipool_forward_ref (Mfix *fix)
9277 /* If set, max_mp is the first pool_entry that has a lower
9278 constraint than the one we are trying to add. */
9279 Mnode * max_mp = NULL;
9280 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
9283 /* If the minipool starts before the end of FIX->INSN then this FIX
9284 can not be placed into the current pool. Furthermore, adding the
9285 new constant pool entry may cause the pool to start FIX_SIZE bytes
9287 if (minipool_vector_head &&
9288 (fix->address + get_attr_length (fix->insn)
9289 >= minipool_vector_head->max_address - fix->fix_size))
9292 /* Scan the pool to see if a constant with the same value has
9293 already been added. While we are doing this, also note the
9294 location where we must insert the constant if it doesn't already
9296 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9298 if (GET_CODE (fix->value) == GET_CODE (mp->value)
9299 && fix->mode == mp->mode
9300 && (GET_CODE (fix->value) != CODE_LABEL
9301 || (CODE_LABEL_NUMBER (fix->value)
9302 == CODE_LABEL_NUMBER (mp->value)))
9303 && rtx_equal_p (fix->value, mp->value))
9305 /* More than one fix references this entry. */
9307 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
9310 /* Note the insertion point if necessary. */
9312 && mp->max_address > max_address)
9315 /* If we are inserting an 8-bytes aligned quantity and
9316 we have not already found an insertion point, then
9317 make sure that all such 8-byte aligned quantities are
9318 placed at the start of the pool. */
9319 if (ARM_DOUBLEWORD_ALIGN
9321 && fix->fix_size >= 8
9322 && mp->fix_size < 8)
9325 max_address = mp->max_address;
9329 /* The value is not currently in the minipool, so we need to create
9330 a new entry for it. If MAX_MP is NULL, the entry will be put on
9331 the end of the list since the placement is less constrained than
9332 any existing entry. Otherwise, we insert the new fix before
9333 MAX_MP and, if necessary, adjust the constraints on the other
9336 mp->fix_size = fix->fix_size;
9337 mp->mode = fix->mode;
9338 mp->value = fix->value;
9340 /* Not yet required for a backwards ref. */
9341 mp->min_address = -65536;
9345 mp->max_address = max_address;
9347 mp->prev = minipool_vector_tail;
9349 if (mp->prev == NULL)
9351 minipool_vector_head = mp;
9352 minipool_vector_label = gen_label_rtx ();
9355 mp->prev->next = mp;
9357 minipool_vector_tail = mp;
9361 if (max_address > max_mp->max_address - mp->fix_size)
9362 mp->max_address = max_mp->max_address - mp->fix_size;
9364 mp->max_address = max_address;
9367 mp->prev = max_mp->prev;
9369 if (mp->prev != NULL)
9370 mp->prev->next = mp;
9372 minipool_vector_head = mp;
9375 /* Save the new entry. */
9378 /* Scan over the preceding entries and adjust their addresses as
9380 while (mp->prev != NULL
9381 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
9383 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
9391 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
9392 HOST_WIDE_INT min_address)
9394 HOST_WIDE_INT offset;
9396 /* The code below assumes these are different. */
9397 gcc_assert (mp != min_mp);
9401 if (min_address > mp->min_address)
9402 mp->min_address = min_address;
9406 /* We will adjust this below if it is too loose. */
9407 mp->min_address = min_address;
9409 /* Unlink MP from its current position. Since min_mp is non-null,
9410 mp->next must be non-null. */
9411 mp->next->prev = mp->prev;
9412 if (mp->prev != NULL)
9413 mp->prev->next = mp->next;
9415 minipool_vector_head = mp->next;
9417 /* Reinsert it after MIN_MP. */
9419 mp->next = min_mp->next;
9421 if (mp->next != NULL)
9422 mp->next->prev = mp;
9424 minipool_vector_tail = mp;
9430 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9432 mp->offset = offset;
9433 if (mp->refcount > 0)
9434 offset += mp->fix_size;
9436 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
9437 mp->next->min_address = mp->min_address + mp->fix_size;
9443 /* Add a constant to the minipool for a backward reference. Returns the
9444 node added or NULL if the constant will not fit in this pool.
9446 Note that the code for insertion for a backwards reference can be
9447 somewhat confusing because the calculated offsets for each fix do
9448 not take into account the size of the pool (which is still under
9451 add_minipool_backward_ref (Mfix *fix)
9453 /* If set, min_mp is the last pool_entry that has a lower constraint
9454 than the one we are trying to add. */
9455 Mnode *min_mp = NULL;
9456 /* This can be negative, since it is only a constraint. */
9457 HOST_WIDE_INT min_address = fix->address - fix->backwards;
9460 /* If we can't reach the current pool from this insn, or if we can't
9461 insert this entry at the end of the pool without pushing other
9462 fixes out of range, then we don't try. This ensures that we
9463 can't fail later on. */
9464 if (min_address >= minipool_barrier->address
9465 || (minipool_vector_tail->min_address + fix->fix_size
9466 >= minipool_barrier->address))
9469 /* Scan the pool to see if a constant with the same value has
9470 already been added. While we are doing this, also note the
9471 location where we must insert the constant if it doesn't already
9473 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
9475 if (GET_CODE (fix->value) == GET_CODE (mp->value)
9476 && fix->mode == mp->mode
9477 && (GET_CODE (fix->value) != CODE_LABEL
9478 || (CODE_LABEL_NUMBER (fix->value)
9479 == CODE_LABEL_NUMBER (mp->value)))
9480 && rtx_equal_p (fix->value, mp->value)
9481 /* Check that there is enough slack to move this entry to the
9482 end of the table (this is conservative). */
9484 > (minipool_barrier->address
9485 + minipool_vector_tail->offset
9486 + minipool_vector_tail->fix_size)))
9489 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
9493 mp->min_address += fix->fix_size;
9496 /* Note the insertion point if necessary. */
9497 if (mp->min_address < min_address)
9499 /* For now, we do not allow the insertion of 8-byte alignment
9500 requiring nodes anywhere but at the start of the pool. */
9501 if (ARM_DOUBLEWORD_ALIGN
9502 && fix->fix_size >= 8 && mp->fix_size < 8)
9507 else if (mp->max_address
9508 < minipool_barrier->address + mp->offset + fix->fix_size)
9510 /* Inserting before this entry would push the fix beyond
9511 its maximum address (which can happen if we have
9512 re-located a forwards fix); force the new fix to come
9514 if (ARM_DOUBLEWORD_ALIGN
9515 && fix->fix_size >= 8 && mp->fix_size < 8)
9520 min_address = mp->min_address + fix->fix_size;
9523 /* Do not insert a non-8-byte aligned quantity before 8-byte
9524 aligned quantities. */
9525 else if (ARM_DOUBLEWORD_ALIGN
9526 && fix->fix_size < 8
9527 && mp->fix_size >= 8)
9530 min_address = mp->min_address + fix->fix_size;
9535 /* We need to create a new entry. */
9537 mp->fix_size = fix->fix_size;
9538 mp->mode = fix->mode;
9539 mp->value = fix->value;
9541 mp->max_address = minipool_barrier->address + 65536;
9543 mp->min_address = min_address;
9548 mp->next = minipool_vector_head;
9550 if (mp->next == NULL)
9552 minipool_vector_tail = mp;
9553 minipool_vector_label = gen_label_rtx ();
9556 mp->next->prev = mp;
9558 minipool_vector_head = mp;
9562 mp->next = min_mp->next;
9566 if (mp->next != NULL)
9567 mp->next->prev = mp;
9569 minipool_vector_tail = mp;
9572 /* Save the new entry. */
9580 /* Scan over the following entries and adjust their offsets. */
9581 while (mp->next != NULL)
9583 if (mp->next->min_address < mp->min_address + mp->fix_size)
9584 mp->next->min_address = mp->min_address + mp->fix_size;
9587 mp->next->offset = mp->offset + mp->fix_size;
9589 mp->next->offset = mp->offset;
9598 assign_minipool_offsets (Mfix *barrier)
9600 HOST_WIDE_INT offset = 0;
9603 minipool_barrier = barrier;
9605 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9607 mp->offset = offset;
9609 if (mp->refcount > 0)
9610 offset += mp->fix_size;
9614 /* Output the literal table */
9616 dump_minipool (rtx scan)
9622 if (ARM_DOUBLEWORD_ALIGN)
9623 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9624 if (mp->refcount > 0 && mp->fix_size >= 8)
9632 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
9633 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
9635 scan = emit_label_after (gen_label_rtx (), scan);
9636 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
9637 scan = emit_label_after (minipool_vector_label, scan);
9639 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
9641 if (mp->refcount > 0)
9646 ";; Offset %u, min %ld, max %ld ",
9647 (unsigned) mp->offset, (unsigned long) mp->min_address,
9648 (unsigned long) mp->max_address);
9649 arm_print_value (dump_file, mp->value);
9650 fputc ('\n', dump_file);
9653 switch (mp->fix_size)
9655 #ifdef HAVE_consttable_1
9657 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
9661 #ifdef HAVE_consttable_2
9663 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
9667 #ifdef HAVE_consttable_4
9669 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
9673 #ifdef HAVE_consttable_8
9675 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
9679 #ifdef HAVE_consttable_16
9681 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
9694 minipool_vector_head = minipool_vector_tail = NULL;
9695 scan = emit_insn_after (gen_consttable_end (), scan);
9696 scan = emit_barrier_after (scan);
9699 /* Return the cost of forcibly inserting a barrier after INSN. */
9701 arm_barrier_cost (rtx insn)
9703 /* Basing the location of the pool on the loop depth is preferable,
9704 but at the moment, the basic block information seems to be
9705 corrupt by this stage of the compilation. */
9707 rtx next = next_nonnote_insn (insn);
9709 if (next != NULL && GET_CODE (next) == CODE_LABEL)
9712 switch (GET_CODE (insn))
9715 /* It will always be better to place the table before the label, rather
9724 return base_cost - 10;
9727 return base_cost + 10;
9731 /* Find the best place in the insn stream in the range
9732 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
9733 Create the barrier by inserting a jump and add a new fix entry for
9736 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
9738 HOST_WIDE_INT count = 0;
9740 rtx from = fix->insn;
9741 /* The instruction after which we will insert the jump. */
9742 rtx selected = NULL;
9744 /* The address at which the jump instruction will be placed. */
9745 HOST_WIDE_INT selected_address;
9747 HOST_WIDE_INT max_count = max_address - fix->address;
9748 rtx label = gen_label_rtx ();
9750 selected_cost = arm_barrier_cost (from);
9751 selected_address = fix->address;
9753 while (from && count < max_count)
9758 /* This code shouldn't have been called if there was a natural barrier
9760 gcc_assert (GET_CODE (from) != BARRIER);
9762 /* Count the length of this insn. */
9763 count += get_attr_length (from);
9765 /* If there is a jump table, add its length. */
9766 tmp = is_jump_table (from);
9769 count += get_jump_table_size (tmp);
9771 /* Jump tables aren't in a basic block, so base the cost on
9772 the dispatch insn. If we select this location, we will
9773 still put the pool after the table. */
9774 new_cost = arm_barrier_cost (from);
9776 if (count < max_count
9777 && (!selected || new_cost <= selected_cost))
9780 selected_cost = new_cost;
9781 selected_address = fix->address + count;
9784 /* Continue after the dispatch table. */
9785 from = NEXT_INSN (tmp);
9789 new_cost = arm_barrier_cost (from);
9791 if (count < max_count
9792 && (!selected || new_cost <= selected_cost))
9795 selected_cost = new_cost;
9796 selected_address = fix->address + count;
9799 from = NEXT_INSN (from);
9802 /* Make sure that we found a place to insert the jump. */
9803 gcc_assert (selected);
9805 /* Create a new JUMP_INSN that branches around a barrier. */
9806 from = emit_jump_insn_after (gen_jump (label), selected);
9807 JUMP_LABEL (from) = label;
9808 barrier = emit_barrier_after (from);
9809 emit_label_after (label, barrier);
9811 /* Create a minipool barrier entry for the new barrier. */
9812 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
9813 new_fix->insn = barrier;
9814 new_fix->address = selected_address;
9815 new_fix->next = fix->next;
9816 fix->next = new_fix;
9821 /* Record that there is a natural barrier in the insn stream at
9824 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
9826 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9829 fix->address = address;
9832 if (minipool_fix_head != NULL)
9833 minipool_fix_tail->next = fix;
9835 minipool_fix_head = fix;
9837 minipool_fix_tail = fix;
9840 /* Record INSN, which will need fixing up to load a value from the
9841 minipool. ADDRESS is the offset of the insn since the start of the
9842 function; LOC is a pointer to the part of the insn which requires
9843 fixing; VALUE is the constant that must be loaded, which is of type
9846 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
9847 enum machine_mode mode, rtx value)
9849 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9852 fix->address = address;
9855 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
9857 fix->forwards = get_attr_pool_range (insn);
9858 fix->backwards = get_attr_neg_pool_range (insn);
9859 fix->minipool = NULL;
9861 /* If an insn doesn't have a range defined for it, then it isn't
9862 expecting to be reworked by this code. Better to stop now than
9863 to generate duff assembly code. */
9864 gcc_assert (fix->forwards || fix->backwards);
9866 /* If an entry requires 8-byte alignment then assume all constant pools
9867 require 4 bytes of padding. Trying to do this later on a per-pool
9868 basis is awkward because existing pool entries have to be modified. */
9869 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
9875 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
9876 GET_MODE_NAME (mode),
9877 INSN_UID (insn), (unsigned long) address,
9878 -1 * (long)fix->backwards, (long)fix->forwards);
9879 arm_print_value (dump_file, fix->value);
9880 fprintf (dump_file, "\n");
9883 /* Add it to the chain of fixes. */
9886 if (minipool_fix_head != NULL)
9887 minipool_fix_tail->next = fix;
9889 minipool_fix_head = fix;
9891 minipool_fix_tail = fix;
9894 /* Return the cost of synthesizing a 64-bit constant VAL inline.
9895 Returns the number of insns needed, or 99 if we don't know how to
9898 arm_const_double_inline_cost (rtx val)
9900 rtx lowpart, highpart;
9901 enum machine_mode mode;
9903 mode = GET_MODE (val);
9905 if (mode == VOIDmode)
9908 gcc_assert (GET_MODE_SIZE (mode) == 8);
9910 lowpart = gen_lowpart (SImode, val);
9911 highpart = gen_highpart_mode (SImode, mode, val);
9913 gcc_assert (GET_CODE (lowpart) == CONST_INT);
9914 gcc_assert (GET_CODE (highpart) == CONST_INT);
9916 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
9917 NULL_RTX, NULL_RTX, 0, 0)
9918 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
9919 NULL_RTX, NULL_RTX, 0, 0));
9922 /* Return true if it is worthwhile to split a 64-bit constant into two
9923 32-bit operations. This is the case if optimizing for size, or
9924 if we have load delay slots, or if one 32-bit part can be done with
9925 a single data operation. */
9927 arm_const_double_by_parts (rtx val)
9929 enum machine_mode mode = GET_MODE (val);
9932 if (optimize_size || arm_ld_sched)
9935 if (mode == VOIDmode)
9938 part = gen_highpart_mode (SImode, mode, val);
9940 gcc_assert (GET_CODE (part) == CONST_INT);
9942 if (const_ok_for_arm (INTVAL (part))
9943 || const_ok_for_arm (~INTVAL (part)))
9946 part = gen_lowpart (SImode, val);
9948 gcc_assert (GET_CODE (part) == CONST_INT);
9950 if (const_ok_for_arm (INTVAL (part))
9951 || const_ok_for_arm (~INTVAL (part)))
9957 /* Scan INSN and note any of its operands that need fixing.
9958 If DO_PUSHES is false we do not actually push any of the fixups
9959 needed. The function returns TRUE if any fixups were needed/pushed.
9960 This is used by arm_memory_load_p() which needs to know about loads
9961 of constants that will be converted into minipool loads. */
9963 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
9965 bool result = false;
9968 extract_insn (insn);
9970 if (!constrain_operands (1))
9971 fatal_insn_not_found (insn);
9973 if (recog_data.n_alternatives == 0)
9976 /* Fill in recog_op_alt with information about the constraints of
9978 preprocess_constraints ();
9980 for (opno = 0; opno < recog_data.n_operands; opno++)
9982 /* Things we need to fix can only occur in inputs. */
9983 if (recog_data.operand_type[opno] != OP_IN)
9986 /* If this alternative is a memory reference, then any mention
9987 of constants in this alternative is really to fool reload
9988 into allowing us to accept one there. We need to fix them up
9989 now so that we output the right code. */
9990 if (recog_op_alt[opno][which_alternative].memory_ok)
9992 rtx op = recog_data.operand[opno];
9994 if (CONSTANT_P (op))
9997 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
9998 recog_data.operand_mode[opno], op);
10001 else if (GET_CODE (op) == MEM
10002 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
10003 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
10007 rtx cop = avoid_constant_pool_reference (op);
10009 /* Casting the address of something to a mode narrower
10010 than a word can cause avoid_constant_pool_reference()
10011 to return the pool reference itself. That's no good to
10012 us here. Lets just hope that we can use the
10013 constant pool value directly. */
10015 cop = get_pool_constant (XEXP (op, 0));
10017 push_minipool_fix (insn, address,
10018 recog_data.operand_loc[opno],
10019 recog_data.operand_mode[opno], cop);
10030 /* Gcc puts the pool in the wrong place for ARM, since we can only
10031 load addresses a limited distance around the pc. We do some
10032 special munging to move the constant pool values to the correct
10033 point in the code. */
10038 HOST_WIDE_INT address = 0;
10041 minipool_fix_head = minipool_fix_tail = NULL;
10043 /* The first insn must always be a note, or the code below won't
10044 scan it properly. */
10045 insn = get_insns ();
10046 gcc_assert (GET_CODE (insn) == NOTE);
10049 /* Scan all the insns and record the operands that will need fixing. */
10050 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
10052 if (TARGET_CIRRUS_FIX_INVALID_INSNS
10053 && (arm_cirrus_insn_p (insn)
10054 || GET_CODE (insn) == JUMP_INSN
10055 || arm_memory_load_p (insn)))
10056 cirrus_reorg (insn);
10058 if (GET_CODE (insn) == BARRIER)
10059 push_minipool_barrier (insn, address);
10060 else if (INSN_P (insn))
10064 note_invalid_constants (insn, address, true);
10065 address += get_attr_length (insn);
10067 /* If the insn is a vector jump, add the size of the table
10068 and skip the table. */
10069 if ((table = is_jump_table (insn)) != NULL)
10071 address += get_jump_table_size (table);
10077 fix = minipool_fix_head;
10079 /* Now scan the fixups and perform the required changes. */
10084 Mfix * last_added_fix;
10085 Mfix * last_barrier = NULL;
10088 /* Skip any further barriers before the next fix. */
10089 while (fix && GET_CODE (fix->insn) == BARRIER)
10092 /* No more fixes. */
10096 last_added_fix = NULL;
10098 for (ftmp = fix; ftmp; ftmp = ftmp->next)
10100 if (GET_CODE (ftmp->insn) == BARRIER)
10102 if (ftmp->address >= minipool_vector_head->max_address)
10105 last_barrier = ftmp;
10107 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
10110 last_added_fix = ftmp; /* Keep track of the last fix added. */
10113 /* If we found a barrier, drop back to that; any fixes that we
10114 could have reached but come after the barrier will now go in
10115 the next mini-pool. */
10116 if (last_barrier != NULL)
10118 /* Reduce the refcount for those fixes that won't go into this
10120 for (fdel = last_barrier->next;
10121 fdel && fdel != ftmp;
10124 fdel->minipool->refcount--;
10125 fdel->minipool = NULL;
10128 ftmp = last_barrier;
10132 /* ftmp is first fix that we can't fit into this pool and
10133 there no natural barriers that we could use. Insert a
10134 new barrier in the code somewhere between the previous
10135 fix and this one, and arrange to jump around it. */
10136 HOST_WIDE_INT max_address;
10138 /* The last item on the list of fixes must be a barrier, so
10139 we can never run off the end of the list of fixes without
10140 last_barrier being set. */
10143 max_address = minipool_vector_head->max_address;
10144 /* Check that there isn't another fix that is in range that
10145 we couldn't fit into this pool because the pool was
10146 already too large: we need to put the pool before such an
10147 instruction. The pool itself may come just after the
10148 fix because create_fix_barrier also allows space for a
10149 jump instruction. */
10150 if (ftmp->address < max_address)
10151 max_address = ftmp->address + 1;
10153 last_barrier = create_fix_barrier (last_added_fix, max_address);
10156 assign_minipool_offsets (last_barrier);
10160 if (GET_CODE (ftmp->insn) != BARRIER
10161 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
10168 /* Scan over the fixes we have identified for this pool, fixing them
10169 up and adding the constants to the pool itself. */
10170 for (this_fix = fix; this_fix && ftmp != this_fix;
10171 this_fix = this_fix->next)
10172 if (GET_CODE (this_fix->insn) != BARRIER)
10175 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
10176 minipool_vector_label),
10177 this_fix->minipool->offset);
10178 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
10181 dump_minipool (last_barrier->insn);
10185 /* From now on we must synthesize any constants that we can't handle
10186 directly. This can happen if the RTL gets split during final
10187 instruction generation. */
10188 after_arm_reorg = 1;
10190 /* Free the minipool memory. */
10191 obstack_free (&minipool_obstack, minipool_startobj);
10194 /* Routines to output assembly language. */
10196 /* If the rtx is the correct value then return the string of the number.
10197 In this way we can ensure that valid double constants are generated even
10198 when cross compiling. */
10200 fp_immediate_constant (rtx x)
10205 if (!fp_consts_inited)
10208 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10209 for (i = 0; i < 8; i++)
10210 if (REAL_VALUES_EQUAL (r, values_fp[i]))
10211 return strings_fp[i];
10213 gcc_unreachable ();
10216 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
10217 static const char *
10218 fp_const_from_val (REAL_VALUE_TYPE *r)
10222 if (!fp_consts_inited)
10225 for (i = 0; i < 8; i++)
10226 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
10227 return strings_fp[i];
10229 gcc_unreachable ();
10232 /* Output the operands of a LDM/STM instruction to STREAM.
10233 MASK is the ARM register set mask of which only bits 0-15 are important.
10234 REG is the base register, either the frame pointer or the stack pointer,
10235 INSTR is the possibly suffixed load or store instruction.
10236 RFE is nonzero if the instruction should also copy spsr to cpsr. */
10239 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
10240 unsigned long mask, int rfe)
10243 bool not_first = FALSE;
10245 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
10246 fputc ('\t', stream);
10247 asm_fprintf (stream, instr, reg);
10248 fputc ('{', stream);
10250 for (i = 0; i <= LAST_ARM_REGNUM; i++)
10251 if (mask & (1 << i))
10254 fprintf (stream, ", ");
10256 asm_fprintf (stream, "%r", i);
10261 fprintf (stream, "}^\n");
10263 fprintf (stream, "}\n");
10267 /* Output a FLDMD instruction to STREAM.
10268 BASE if the register containing the address.
10269 REG and COUNT specify the register range.
10270 Extra registers may be added to avoid hardware bugs.
10272 We output FLDMD even for ARMv5 VFP implementations. Although
10273 FLDMD is technically not supported until ARMv6, it is believed
10274 that all VFP implementations support its use in this context. */
10277 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
10281 /* Workaround ARM10 VFPr1 bug. */
10282 if (count == 2 && !arm_arch6)
10289 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
10290 load into multiple parts if we have to handle more than 16 registers. */
10293 vfp_output_fldmd (stream, base, reg, 16);
10294 vfp_output_fldmd (stream, base, reg + 16, count - 16);
10298 fputc ('\t', stream);
10299 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
10301 for (i = reg; i < reg + count; i++)
10304 fputs (", ", stream);
10305 asm_fprintf (stream, "d%d", i);
10307 fputs ("}\n", stream);
10312 /* Output the assembly for a store multiple. */
10315 vfp_output_fstmd (rtx * operands)
10322 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
10323 p = strlen (pattern);
10325 gcc_assert (GET_CODE (operands[1]) == REG);
10327 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
10328 for (i = 1; i < XVECLEN (operands[2], 0); i++)
10330 p += sprintf (&pattern[p], ", d%d", base + i);
10332 strcpy (&pattern[p], "}");
10334 output_asm_insn (pattern, operands);
10339 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
10340 number of bytes pushed. */
10343 vfp_emit_fstmd (int base_reg, int count)
10350 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
10351 register pairs are stored by a store multiple insn. We avoid this
10352 by pushing an extra pair. */
10353 if (count == 2 && !arm_arch6)
10355 if (base_reg == LAST_VFP_REGNUM - 3)
10360 /* FSTMD may not store more than 16 doubleword registers at once. Split
10361 larger stores into multiple parts (up to a maximum of two, in
10366 /* NOTE: base_reg is an internal register number, so each D register
10368 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
10369 saved += vfp_emit_fstmd (base_reg, 16);
10373 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
10374 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
10376 reg = gen_rtx_REG (DFmode, base_reg);
10379 XVECEXP (par, 0, 0)
10380 = gen_rtx_SET (VOIDmode,
10381 gen_frame_mem (BLKmode,
10382 gen_rtx_PRE_DEC (BLKmode,
10383 stack_pointer_rtx)),
10384 gen_rtx_UNSPEC (BLKmode,
10385 gen_rtvec (1, reg),
10386 UNSPEC_PUSH_MULT));
10388 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10389 plus_constant (stack_pointer_rtx, -(count * 8)));
10390 RTX_FRAME_RELATED_P (tmp) = 1;
10391 XVECEXP (dwarf, 0, 0) = tmp;
10393 tmp = gen_rtx_SET (VOIDmode,
10394 gen_frame_mem (DFmode, stack_pointer_rtx),
10396 RTX_FRAME_RELATED_P (tmp) = 1;
10397 XVECEXP (dwarf, 0, 1) = tmp;
10399 for (i = 1; i < count; i++)
10401 reg = gen_rtx_REG (DFmode, base_reg);
10403 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
10405 tmp = gen_rtx_SET (VOIDmode,
10406 gen_frame_mem (DFmode,
10407 plus_constant (stack_pointer_rtx,
10410 RTX_FRAME_RELATED_P (tmp) = 1;
10411 XVECEXP (dwarf, 0, i + 1) = tmp;
10414 par = emit_insn (par);
10415 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
10416 RTX_FRAME_RELATED_P (par) = 1;
10421 /* Emit a call instruction with pattern PAT. ADDR is the address of
10422 the call target. */
10425 arm_emit_call_insn (rtx pat, rtx addr)
10429 insn = emit_call_insn (pat);
10431 /* The PIC register is live on entry to VxWorks PIC PLT entries.
10432 If the call might use such an entry, add a use of the PIC register
10433 to the instruction's CALL_INSN_FUNCTION_USAGE. */
10434 if (TARGET_VXWORKS_RTP
10436 && GET_CODE (addr) == SYMBOL_REF
10437 && (SYMBOL_REF_DECL (addr)
10438 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
10439 : !SYMBOL_REF_LOCAL_P (addr)))
10441 require_pic_register ();
10442 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
10446 /* Output a 'call' insn. */
10448 output_call (rtx *operands)
10450 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
10452 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
10453 if (REGNO (operands[0]) == LR_REGNUM)
10455 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
10456 output_asm_insn ("mov%?\t%0, %|lr", operands);
10459 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10461 if (TARGET_INTERWORK || arm_arch4t)
10462 output_asm_insn ("bx%?\t%0", operands);
10464 output_asm_insn ("mov%?\t%|pc, %0", operands);
10469 /* Output a 'call' insn that is a reference in memory. */
10471 output_call_mem (rtx *operands)
10473 if (TARGET_INTERWORK && !arm_arch5)
10475 output_asm_insn ("ldr%?\t%|ip, %0", operands);
10476 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10477 output_asm_insn ("bx%?\t%|ip", operands);
10479 else if (regno_use_in (LR_REGNUM, operands[0]))
10481 /* LR is used in the memory address. We load the address in the
10482 first instruction. It's safe to use IP as the target of the
10483 load since the call will kill it anyway. */
10484 output_asm_insn ("ldr%?\t%|ip, %0", operands);
10486 output_asm_insn ("blx%?\t%|ip", operands);
10489 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10491 output_asm_insn ("bx%?\t%|ip", operands);
10493 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
10498 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10499 output_asm_insn ("ldr%?\t%|pc, %0", operands);
10506 /* Output a move from arm registers to an fpa registers.
10507 OPERANDS[0] is an fpa register.
10508 OPERANDS[1] is the first registers of an arm register pair. */
10510 output_mov_long_double_fpa_from_arm (rtx *operands)
10512 int arm_reg0 = REGNO (operands[1]);
10515 gcc_assert (arm_reg0 != IP_REGNUM);
10517 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10518 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10519 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
10521 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
10522 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
10527 /* Output a move from an fpa register to arm registers.
10528 OPERANDS[0] is the first registers of an arm register pair.
10529 OPERANDS[1] is an fpa register. */
10531 output_mov_long_double_arm_from_fpa (rtx *operands)
10533 int arm_reg0 = REGNO (operands[0]);
10536 gcc_assert (arm_reg0 != IP_REGNUM);
10538 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10539 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10540 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
10542 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
10543 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
10547 /* Output a move from arm registers to arm registers of a long double
10548 OPERANDS[0] is the destination.
10549 OPERANDS[1] is the source. */
10551 output_mov_long_double_arm_from_arm (rtx *operands)
10553 /* We have to be careful here because the two might overlap. */
10554 int dest_start = REGNO (operands[0]);
10555 int src_start = REGNO (operands[1]);
10559 if (dest_start < src_start)
10561 for (i = 0; i < 3; i++)
10563 ops[0] = gen_rtx_REG (SImode, dest_start + i);
10564 ops[1] = gen_rtx_REG (SImode, src_start + i);
10565 output_asm_insn ("mov%?\t%0, %1", ops);
10570 for (i = 2; i >= 0; i--)
10572 ops[0] = gen_rtx_REG (SImode, dest_start + i);
10573 ops[1] = gen_rtx_REG (SImode, src_start + i);
10574 output_asm_insn ("mov%?\t%0, %1", ops);
10582 /* Emit a MOVW/MOVT pair. */
10583 void arm_emit_movpair (rtx dest, rtx src)
10585 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
10586 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
10590 /* Output a move from arm registers to an fpa registers.
10591 OPERANDS[0] is an fpa register.
10592 OPERANDS[1] is the first registers of an arm register pair. */
10594 output_mov_double_fpa_from_arm (rtx *operands)
10596 int arm_reg0 = REGNO (operands[1]);
10599 gcc_assert (arm_reg0 != IP_REGNUM);
10601 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10602 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10603 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
10604 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
10608 /* Output a move from an fpa register to arm registers.
10609 OPERANDS[0] is the first registers of an arm register pair.
10610 OPERANDS[1] is an fpa register. */
10612 output_mov_double_arm_from_fpa (rtx *operands)
10614 int arm_reg0 = REGNO (operands[0]);
10617 gcc_assert (arm_reg0 != IP_REGNUM);
10619 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10620 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10621 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
10622 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
10626 /* Output a move between double words.
10627 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
10628 or MEM<-REG and all MEMs must be offsettable addresses. */
10630 output_move_double (rtx *operands)
10632 enum rtx_code code0 = GET_CODE (operands[0]);
10633 enum rtx_code code1 = GET_CODE (operands[1]);
10638 unsigned int reg0 = REGNO (operands[0]);
10640 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
10642 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
10644 switch (GET_CODE (XEXP (operands[1], 0)))
10648 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
10649 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
10651 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10655 gcc_assert (TARGET_LDRD);
10656 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
10661 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
10663 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
10668 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
10670 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
10674 gcc_assert (TARGET_LDRD);
10675 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
10680 /* Autoicrement addressing modes should never have overlapping
10681 base and destination registers, and overlapping index registers
10682 are already prohibited, so this doesn't need to worry about
10684 otherops[0] = operands[0];
10685 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
10686 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
10688 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
10690 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
10692 /* Registers overlap so split out the increment. */
10693 output_asm_insn ("add%?\t%1, %1, %2", otherops);
10694 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
10698 /* Use a single insn if we can.
10699 FIXME: IWMMXT allows offsets larger than ldrd can
10700 handle, fix these up with a pair of ldr. */
10702 || GET_CODE (otherops[2]) != CONST_INT
10703 || (INTVAL (otherops[2]) > -256
10704 && INTVAL (otherops[2]) < 256))
10705 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
10708 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10709 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10715 /* Use a single insn if we can.
10716 FIXME: IWMMXT allows offsets larger than ldrd can handle,
10717 fix these up with a pair of ldr. */
10719 || GET_CODE (otherops[2]) != CONST_INT
10720 || (INTVAL (otherops[2]) > -256
10721 && INTVAL (otherops[2]) < 256))
10722 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
10725 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10726 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10733 /* We might be able to use ldrd %0, %1 here. However the range is
10734 different to ldr/adr, and it is broken on some ARMv7-M
10735 implementations. */
10736 /* Use the second register of the pair to avoid problematic
10738 otherops[1] = operands[1];
10739 output_asm_insn ("adr%?\t%0, %1", otherops);
10740 operands[1] = otherops[0];
10742 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
10744 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
10747 /* ??? This needs checking for thumb2. */
10749 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
10750 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
10752 otherops[0] = operands[0];
10753 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
10754 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
10756 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
10758 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
10760 switch ((int) INTVAL (otherops[2]))
10763 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
10768 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
10773 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
10777 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
10778 operands[1] = otherops[0];
10780 && (GET_CODE (otherops[2]) == REG
10782 || (GET_CODE (otherops[2]) == CONST_INT
10783 && INTVAL (otherops[2]) > -256
10784 && INTVAL (otherops[2]) < 256)))
10786 if (reg_overlap_mentioned_p (operands[0],
10790 /* Swap base and index registers over to
10791 avoid a conflict. */
10793 otherops[1] = otherops[2];
10796 /* If both registers conflict, it will usually
10797 have been fixed by a splitter. */
10798 if (reg_overlap_mentioned_p (operands[0], otherops[2])
10799 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
10801 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10802 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
10806 otherops[0] = operands[0];
10807 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
10812 if (GET_CODE (otherops[2]) == CONST_INT)
10814 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
10815 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
10817 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10820 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10823 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
10826 return "ldr%(d%)\t%0, [%1]";
10828 return "ldm%(ia%)\t%1, %M0";
10832 otherops[1] = adjust_address (operands[1], SImode, 4);
10833 /* Take care of overlapping base/data reg. */
10834 if (reg_mentioned_p (operands[0], operands[1]))
10836 output_asm_insn ("ldr%?\t%0, %1", otherops);
10837 output_asm_insn ("ldr%?\t%0, %1", operands);
10841 output_asm_insn ("ldr%?\t%0, %1", operands);
10842 output_asm_insn ("ldr%?\t%0, %1", otherops);
10849 /* Constraints should ensure this. */
10850 gcc_assert (code0 == MEM && code1 == REG);
10851 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
10853 switch (GET_CODE (XEXP (operands[0], 0)))
10857 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
10859 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10863 gcc_assert (TARGET_LDRD);
10864 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
10869 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
10871 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
10876 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
10878 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
10882 gcc_assert (TARGET_LDRD);
10883 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
10888 otherops[0] = operands[1];
10889 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
10890 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
10892 /* IWMMXT allows offsets larger than ldrd can handle,
10893 fix these up with a pair of ldr. */
10895 && GET_CODE (otherops[2]) == CONST_INT
10896 && (INTVAL(otherops[2]) <= -256
10897 || INTVAL(otherops[2]) >= 256))
10899 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10901 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10902 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10906 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10907 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10910 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10911 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
10913 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
10917 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
10918 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
10920 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
10923 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
10929 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
10935 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
10940 && (GET_CODE (otherops[2]) == REG
10942 || (GET_CODE (otherops[2]) == CONST_INT
10943 && INTVAL (otherops[2]) > -256
10944 && INTVAL (otherops[2]) < 256)))
10946 otherops[0] = operands[1];
10947 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
10948 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
10954 otherops[0] = adjust_address (operands[0], SImode, 4);
10955 otherops[1] = operands[1];
10956 output_asm_insn ("str%?\t%1, %0", operands);
10957 output_asm_insn ("str%?\t%H1, %0", otherops);
10964 /* Output a move, load or store for quad-word vectors in ARM registers. Only
10965 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
10968 output_move_quad (rtx *operands)
10970 if (REG_P (operands[0]))
10972 /* Load, or reg->reg move. */
10974 if (MEM_P (operands[1]))
10976 switch (GET_CODE (XEXP (operands[1], 0)))
10979 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10984 output_asm_insn ("adr%?\t%0, %1", operands);
10985 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
10989 gcc_unreachable ();
10997 gcc_assert (REG_P (operands[1]));
10999 dest = REGNO (operands[0]);
11000 src = REGNO (operands[1]);
11002 /* This seems pretty dumb, but hopefully GCC won't try to do it
11005 for (i = 0; i < 4; i++)
11007 ops[0] = gen_rtx_REG (SImode, dest + i);
11008 ops[1] = gen_rtx_REG (SImode, src + i);
11009 output_asm_insn ("mov%?\t%0, %1", ops);
11012 for (i = 3; i >= 0; i--)
11014 ops[0] = gen_rtx_REG (SImode, dest + i);
11015 ops[1] = gen_rtx_REG (SImode, src + i);
11016 output_asm_insn ("mov%?\t%0, %1", ops);
11022 gcc_assert (MEM_P (operands[0]));
11023 gcc_assert (REG_P (operands[1]));
11024 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
11026 switch (GET_CODE (XEXP (operands[0], 0)))
11029 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
11033 gcc_unreachable ();
11040 /* Output a VFP load or store instruction. */
11043 output_move_vfp (rtx *operands)
11045 rtx reg, mem, addr, ops[2];
11046 int load = REG_P (operands[0]);
11047 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
11048 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
11051 enum machine_mode mode;
11053 reg = operands[!load];
11054 mem = operands[load];
11056 mode = GET_MODE (reg);
11058 gcc_assert (REG_P (reg));
11059 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
11060 gcc_assert (mode == SFmode
11064 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
11065 gcc_assert (MEM_P (mem));
11067 addr = XEXP (mem, 0);
11069 switch (GET_CODE (addr))
11072 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
11073 ops[0] = XEXP (addr, 0);
11078 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
11079 ops[0] = XEXP (addr, 0);
11084 templ = "f%s%c%%?\t%%%s0, %%1%s";
11090 sprintf (buff, templ,
11091 load ? "ld" : "st",
11094 integer_p ? "\t%@ int" : "");
11095 output_asm_insn (buff, ops);
11100 /* Output a Neon quad-word load or store, or a load or store for
11101 larger structure modes.
11103 WARNING: The ordering of elements is weird in big-endian mode,
11104 because we use VSTM, as required by the EABI. GCC RTL defines
11105 element ordering based on in-memory order. This can be differ
11106 from the architectural ordering of elements within a NEON register.
11107 The intrinsics defined in arm_neon.h use the NEON register element
11108 ordering, not the GCC RTL element ordering.
11110 For example, the in-memory ordering of a big-endian a quadword
11111 vector with 16-bit elements when stored from register pair {d0,d1}
11112 will be (lowest address first, d0[N] is NEON register element N):
11114 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
11116 When necessary, quadword registers (dN, dN+1) are moved to ARM
11117 registers from rN in the order:
11119 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
11121 So that STM/LDM can be used on vectors in ARM registers, and the
11122 same memory layout will result as if VSTM/VLDM were used. */
11125 output_move_neon (rtx *operands)
11127 rtx reg, mem, addr, ops[2];
11128 int regno, load = REG_P (operands[0]);
11131 enum machine_mode mode;
11133 reg = operands[!load];
11134 mem = operands[load];
11136 mode = GET_MODE (reg);
11138 gcc_assert (REG_P (reg));
11139 regno = REGNO (reg);
11140 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
11141 || NEON_REGNO_OK_FOR_QUAD (regno));
11142 gcc_assert (VALID_NEON_DREG_MODE (mode)
11143 || VALID_NEON_QREG_MODE (mode)
11144 || VALID_NEON_STRUCT_MODE (mode));
11145 gcc_assert (MEM_P (mem));
11147 addr = XEXP (mem, 0);
11149 /* Strip off const from addresses like (const (plus (...))). */
11150 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
11151 addr = XEXP (addr, 0);
11153 switch (GET_CODE (addr))
11156 templ = "v%smia%%?\t%%0!, %%h1";
11157 ops[0] = XEXP (addr, 0);
11162 /* FIXME: We should be using vld1/vst1 here in BE mode? */
11163 templ = "v%smdb%%?\t%%0!, %%h1";
11164 ops[0] = XEXP (addr, 0);
11169 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
11170 gcc_unreachable ();
11175 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
11178 for (i = 0; i < nregs; i++)
11180 /* We're only using DImode here because it's a convenient size. */
11181 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
11182 ops[1] = adjust_address (mem, SImode, 8 * i);
11183 if (reg_overlap_mentioned_p (ops[0], mem))
11185 gcc_assert (overlap == -1);
11190 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
11191 output_asm_insn (buff, ops);
11196 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
11197 ops[1] = adjust_address (mem, SImode, 8 * overlap);
11198 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
11199 output_asm_insn (buff, ops);
11206 templ = "v%smia%%?\t%%m0, %%h1";
11211 sprintf (buff, templ, load ? "ld" : "st");
11212 output_asm_insn (buff, ops);
11217 /* Output an ADD r, s, #n where n may be too big for one instruction.
11218 If adding zero to one register, output nothing. */
11220 output_add_immediate (rtx *operands)
11222 HOST_WIDE_INT n = INTVAL (operands[2]);
11224 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
11227 output_multi_immediate (operands,
11228 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
11231 output_multi_immediate (operands,
11232 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
11239 /* Output a multiple immediate operation.
11240 OPERANDS is the vector of operands referred to in the output patterns.
11241 INSTR1 is the output pattern to use for the first constant.
11242 INSTR2 is the output pattern to use for subsequent constants.
11243 IMMED_OP is the index of the constant slot in OPERANDS.
11244 N is the constant value. */
11245 static const char *
11246 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
11247 int immed_op, HOST_WIDE_INT n)
11249 #if HOST_BITS_PER_WIDE_INT > 32
11255 /* Quick and easy output. */
11256 operands[immed_op] = const0_rtx;
11257 output_asm_insn (instr1, operands);
11262 const char * instr = instr1;
11264 /* Note that n is never zero here (which would give no output). */
11265 for (i = 0; i < 32; i += 2)
11269 operands[immed_op] = GEN_INT (n & (255 << i));
11270 output_asm_insn (instr, operands);
11280 /* Return the name of a shifter operation. */
11281 static const char *
11282 arm_shift_nmem(enum rtx_code code)
11287 return ARM_LSL_NAME;
11303 /* Return the appropriate ARM instruction for the operation code.
11304 The returned result should not be overwritten. OP is the rtx of the
11305 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
11308 arithmetic_instr (rtx op, int shift_first_arg)
11310 switch (GET_CODE (op))
11316 return shift_first_arg ? "rsb" : "sub";
11331 return arm_shift_nmem(GET_CODE(op));
11334 gcc_unreachable ();
11338 /* Ensure valid constant shifts and return the appropriate shift mnemonic
11339 for the operation code. The returned result should not be overwritten.
11340 OP is the rtx code of the shift.
11341 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
11343 static const char *
11344 shift_op (rtx op, HOST_WIDE_INT *amountp)
11347 enum rtx_code code = GET_CODE (op);
11349 switch (GET_CODE (XEXP (op, 1)))
11357 *amountp = INTVAL (XEXP (op, 1));
11361 gcc_unreachable ();
11367 gcc_assert (*amountp != -1);
11368 *amountp = 32 - *amountp;
11371 /* Fall through. */
11377 mnem = arm_shift_nmem(code);
11381 /* We never have to worry about the amount being other than a
11382 power of 2, since this case can never be reloaded from a reg. */
11383 gcc_assert (*amountp != -1);
11384 *amountp = int_log2 (*amountp);
11385 return ARM_LSL_NAME;
11388 gcc_unreachable ();
11391 if (*amountp != -1)
11393 /* This is not 100% correct, but follows from the desire to merge
11394 multiplication by a power of 2 with the recognizer for a
11395 shift. >=32 is not a valid shift for "lsl", so we must try and
11396 output a shift that produces the correct arithmetical result.
11397 Using lsr #32 is identical except for the fact that the carry bit
11398 is not set correctly if we set the flags; but we never use the
11399 carry bit from such an operation, so we can ignore that. */
11400 if (code == ROTATERT)
11401 /* Rotate is just modulo 32. */
11403 else if (*amountp != (*amountp & 31))
11405 if (code == ASHIFT)
11410 /* Shifts of 0 are no-ops. */
11418 /* Obtain the shift from the POWER of two. */
11420 static HOST_WIDE_INT
11421 int_log2 (HOST_WIDE_INT power)
11423 HOST_WIDE_INT shift = 0;
11425 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
11427 gcc_assert (shift <= 31);
11434 /* Output a .ascii pseudo-op, keeping track of lengths. This is
11435 because /bin/as is horribly restrictive. The judgement about
11436 whether or not each character is 'printable' (and can be output as
11437 is) or not (and must be printed with an octal escape) must be made
11438 with reference to the *host* character set -- the situation is
11439 similar to that discussed in the comments above pp_c_char in
11440 c-pretty-print.c. */
11442 #define MAX_ASCII_LEN 51
11445 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
11448 int len_so_far = 0;
11450 fputs ("\t.ascii\t\"", stream);
11452 for (i = 0; i < len; i++)
11456 if (len_so_far >= MAX_ASCII_LEN)
11458 fputs ("\"\n\t.ascii\t\"", stream);
11464 if (c == '\\' || c == '\"')
11466 putc ('\\', stream);
11474 fprintf (stream, "\\%03o", c);
11479 fputs ("\"\n", stream);
11482 /* Compute the register save mask for registers 0 through 12
11483 inclusive. This code is used by arm_compute_save_reg_mask. */
11485 static unsigned long
11486 arm_compute_save_reg0_reg12_mask (void)
11488 unsigned long func_type = arm_current_func_type ();
11489 unsigned long save_reg_mask = 0;
11492 if (IS_INTERRUPT (func_type))
11494 unsigned int max_reg;
11495 /* Interrupt functions must not corrupt any registers,
11496 even call clobbered ones. If this is a leaf function
11497 we can just examine the registers used by the RTL, but
11498 otherwise we have to assume that whatever function is
11499 called might clobber anything, and so we have to save
11500 all the call-clobbered registers as well. */
11501 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
11502 /* FIQ handlers have registers r8 - r12 banked, so
11503 we only need to check r0 - r7, Normal ISRs only
11504 bank r14 and r15, so we must check up to r12.
11505 r13 is the stack pointer which is always preserved,
11506 so we do not need to consider it here. */
11511 for (reg = 0; reg <= max_reg; reg++)
11512 if (df_regs_ever_live_p (reg)
11513 || (! current_function_is_leaf && call_used_regs[reg]))
11514 save_reg_mask |= (1 << reg);
11516 /* Also save the pic base register if necessary. */
11518 && !TARGET_SINGLE_PIC_BASE
11519 && arm_pic_register != INVALID_REGNUM
11520 && crtl->uses_pic_offset_table)
11521 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11525 /* In the normal case we only need to save those registers
11526 which are call saved and which are used by this function. */
11527 for (reg = 0; reg <= 11; reg++)
11528 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
11529 save_reg_mask |= (1 << reg);
11531 /* Handle the frame pointer as a special case. */
11532 if (frame_pointer_needed)
11533 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
11535 /* If we aren't loading the PIC register,
11536 don't stack it even though it may be live. */
11538 && !TARGET_SINGLE_PIC_BASE
11539 && arm_pic_register != INVALID_REGNUM
11540 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
11541 || crtl->uses_pic_offset_table))
11542 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11544 /* The prologue will copy SP into R0, so save it. */
11545 if (IS_STACKALIGN (func_type))
11546 save_reg_mask |= 1;
11549 /* Save registers so the exception handler can modify them. */
11550 if (crtl->calls_eh_return)
11556 reg = EH_RETURN_DATA_REGNO (i);
11557 if (reg == INVALID_REGNUM)
11559 save_reg_mask |= 1 << reg;
11563 return save_reg_mask;
11567 /* Compute the number of bytes used to store the static chain register on the
11568 stack, above the stack frame. We need to know this accurately to get the
11569 alignment of the rest of the stack frame correct. */
11571 static int arm_compute_static_chain_stack_bytes (void)
11573 unsigned long func_type = arm_current_func_type ();
11574 int static_chain_stack_bytes = 0;
11576 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
11577 IS_NESTED (func_type) &&
11578 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
11579 static_chain_stack_bytes = 4;
11581 return static_chain_stack_bytes;
11585 /* Compute a bit mask of which registers need to be
11586 saved on the stack for the current function.
11587 This is used by arm_get_frame_offsets, which may add extra registers. */
11589 static unsigned long
11590 arm_compute_save_reg_mask (void)
11592 unsigned int save_reg_mask = 0;
11593 unsigned long func_type = arm_current_func_type ();
11596 if (IS_NAKED (func_type))
11597 /* This should never really happen. */
11600 /* If we are creating a stack frame, then we must save the frame pointer,
11601 IP (which will hold the old stack pointer), LR and the PC. */
11602 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
11604 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
11607 | (1 << PC_REGNUM);
11609 /* Volatile functions do not return, so there
11610 is no need to save any other registers. */
11611 if (IS_VOLATILE (func_type))
11612 return save_reg_mask;
11614 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
11616 /* Decide if we need to save the link register.
11617 Interrupt routines have their own banked link register,
11618 so they never need to save it.
11619 Otherwise if we do not use the link register we do not need to save
11620 it. If we are pushing other registers onto the stack however, we
11621 can save an instruction in the epilogue by pushing the link register
11622 now and then popping it back into the PC. This incurs extra memory
11623 accesses though, so we only do it when optimizing for size, and only
11624 if we know that we will not need a fancy return sequence. */
11625 if (df_regs_ever_live_p (LR_REGNUM)
11628 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
11629 && !crtl->calls_eh_return))
11630 save_reg_mask |= 1 << LR_REGNUM;
11632 if (cfun->machine->lr_save_eliminated)
11633 save_reg_mask &= ~ (1 << LR_REGNUM);
11635 if (TARGET_REALLY_IWMMXT
11636 && ((bit_count (save_reg_mask)
11637 + ARM_NUM_INTS (crtl->args.pretend_args_size +
11638 arm_compute_static_chain_stack_bytes())
11641 /* The total number of registers that are going to be pushed
11642 onto the stack is odd. We need to ensure that the stack
11643 is 64-bit aligned before we start to save iWMMXt registers,
11644 and also before we start to create locals. (A local variable
11645 might be a double or long long which we will load/store using
11646 an iWMMXt instruction). Therefore we need to push another
11647 ARM register, so that the stack will be 64-bit aligned. We
11648 try to avoid using the arg registers (r0 -r3) as they might be
11649 used to pass values in a tail call. */
11650 for (reg = 4; reg <= 12; reg++)
11651 if ((save_reg_mask & (1 << reg)) == 0)
11655 save_reg_mask |= (1 << reg);
11658 cfun->machine->sibcall_blocked = 1;
11659 save_reg_mask |= (1 << 3);
11663 /* We may need to push an additional register for use initializing the
11664 PIC base register. */
11665 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
11666 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
11668 reg = thumb_find_work_register (1 << 4);
11669 if (!call_used_regs[reg])
11670 save_reg_mask |= (1 << reg);
11673 return save_reg_mask;
11677 /* Compute a bit mask of which registers need to be
11678 saved on the stack for the current function. */
11679 static unsigned long
11680 thumb1_compute_save_reg_mask (void)
11682 unsigned long mask;
11686 for (reg = 0; reg < 12; reg ++)
11687 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11691 && !TARGET_SINGLE_PIC_BASE
11692 && arm_pic_register != INVALID_REGNUM
11693 && crtl->uses_pic_offset_table)
11694 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11696 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
11697 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
11698 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
11700 /* LR will also be pushed if any lo regs are pushed. */
11701 if (mask & 0xff || thumb_force_lr_save ())
11702 mask |= (1 << LR_REGNUM);
11704 /* Make sure we have a low work register if we need one.
11705 We will need one if we are going to push a high register,
11706 but we are not currently intending to push a low register. */
11707 if ((mask & 0xff) == 0
11708 && ((mask & 0x0f00) || TARGET_BACKTRACE))
11710 /* Use thumb_find_work_register to choose which register
11711 we will use. If the register is live then we will
11712 have to push it. Use LAST_LO_REGNUM as our fallback
11713 choice for the register to select. */
11714 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
11715 /* Make sure the register returned by thumb_find_work_register is
11716 not part of the return value. */
11717 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
11718 reg = LAST_LO_REGNUM;
11720 if (! call_used_regs[reg])
11724 /* The 504 below is 8 bytes less than 512 because there are two possible
11725 alignment words. We can't tell here if they will be present or not so we
11726 have to play it safe and assume that they are. */
11727 if ((CALLER_INTERWORKING_SLOT_SIZE +
11728 ROUND_UP_WORD (get_frame_size ()) +
11729 crtl->outgoing_args_size) >= 504)
11731 /* This is the same as the code in thumb1_expand_prologue() which
11732 determines which register to use for stack decrement. */
11733 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
11734 if (mask & (1 << reg))
11737 if (reg > LAST_LO_REGNUM)
11739 /* Make sure we have a register available for stack decrement. */
11740 mask |= 1 << LAST_LO_REGNUM;
11748 /* Return the number of bytes required to save VFP registers. */
11750 arm_get_vfp_saved_size (void)
11752 unsigned int regno;
11757 /* Space for saved VFP registers. */
11758 if (TARGET_HARD_FLOAT && TARGET_VFP)
11761 for (regno = FIRST_VFP_REGNUM;
11762 regno < LAST_VFP_REGNUM;
11765 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
11766 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
11770 /* Workaround ARM10 VFPr1 bug. */
11771 if (count == 2 && !arm_arch6)
11773 saved += count * 8;
11782 if (count == 2 && !arm_arch6)
11784 saved += count * 8;
11791 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
11792 everything bar the final return instruction. */
11794 output_return_instruction (rtx operand, int really_return, int reverse)
11796 char conditional[10];
11799 unsigned long live_regs_mask;
11800 unsigned long func_type;
11801 arm_stack_offsets *offsets;
11803 func_type = arm_current_func_type ();
11805 if (IS_NAKED (func_type))
11808 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11810 /* If this function was declared non-returning, and we have
11811 found a tail call, then we have to trust that the called
11812 function won't return. */
11817 /* Otherwise, trap an attempted return by aborting. */
11819 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
11821 assemble_external_libcall (ops[1]);
11822 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
11828 gcc_assert (!cfun->calls_alloca || really_return);
11830 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
11832 cfun->machine->return_used_this_function = 1;
11834 offsets = arm_get_frame_offsets ();
11835 live_regs_mask = offsets->saved_regs_mask;
11837 if (live_regs_mask)
11839 const char * return_reg;
11841 /* If we do not have any special requirements for function exit
11842 (e.g. interworking) then we can load the return address
11843 directly into the PC. Otherwise we must load it into LR. */
11845 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
11846 return_reg = reg_names[PC_REGNUM];
11848 return_reg = reg_names[LR_REGNUM];
11850 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
11852 /* There are three possible reasons for the IP register
11853 being saved. 1) a stack frame was created, in which case
11854 IP contains the old stack pointer, or 2) an ISR routine
11855 corrupted it, or 3) it was saved to align the stack on
11856 iWMMXt. In case 1, restore IP into SP, otherwise just
11858 if (frame_pointer_needed)
11860 live_regs_mask &= ~ (1 << IP_REGNUM);
11861 live_regs_mask |= (1 << SP_REGNUM);
11864 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
11867 /* On some ARM architectures it is faster to use LDR rather than
11868 LDM to load a single register. On other architectures, the
11869 cost is the same. In 26 bit mode, or for exception handlers,
11870 we have to use LDM to load the PC so that the CPSR is also
11872 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11873 if (live_regs_mask == (1U << reg))
11876 if (reg <= LAST_ARM_REGNUM
11877 && (reg != LR_REGNUM
11879 || ! IS_INTERRUPT (func_type)))
11881 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
11882 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
11889 /* Generate the load multiple instruction to restore the
11890 registers. Note we can get here, even if
11891 frame_pointer_needed is true, but only if sp already
11892 points to the base of the saved core registers. */
11893 if (live_regs_mask & (1 << SP_REGNUM))
11895 unsigned HOST_WIDE_INT stack_adjust;
11897 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
11898 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
11900 if (stack_adjust && arm_arch5 && TARGET_ARM)
11901 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
11904 /* If we can't use ldmib (SA110 bug),
11905 then try to pop r3 instead. */
11907 live_regs_mask |= 1 << 3;
11908 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
11912 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
11914 p = instr + strlen (instr);
11916 for (reg = 0; reg <= SP_REGNUM; reg++)
11917 if (live_regs_mask & (1 << reg))
11919 int l = strlen (reg_names[reg]);
11925 memcpy (p, ", ", 2);
11929 memcpy (p, "%|", 2);
11930 memcpy (p + 2, reg_names[reg], l);
11934 if (live_regs_mask & (1 << LR_REGNUM))
11936 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
11937 /* If returning from an interrupt, restore the CPSR. */
11938 if (IS_INTERRUPT (func_type))
11945 output_asm_insn (instr, & operand);
11947 /* See if we need to generate an extra instruction to
11948 perform the actual function return. */
11950 && func_type != ARM_FT_INTERWORKED
11951 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
11953 /* The return has already been handled
11954 by loading the LR into the PC. */
11961 switch ((int) ARM_FUNC_TYPE (func_type))
11965 /* ??? This is wrong for unified assembly syntax. */
11966 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
11969 case ARM_FT_INTERWORKED:
11970 sprintf (instr, "bx%s\t%%|lr", conditional);
11973 case ARM_FT_EXCEPTION:
11974 /* ??? This is wrong for unified assembly syntax. */
11975 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
11979 /* Use bx if it's available. */
11980 if (arm_arch5 || arm_arch4t)
11981 sprintf (instr, "bx%s\t%%|lr", conditional);
11983 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
11987 output_asm_insn (instr, & operand);
11993 /* Write the function name into the code section, directly preceding
11994 the function prologue.
11996 Code will be output similar to this:
11998 .ascii "arm_poke_function_name", 0
12001 .word 0xff000000 + (t1 - t0)
12002 arm_poke_function_name
12004 stmfd sp!, {fp, ip, lr, pc}
12007 When performing a stack backtrace, code can inspect the value
12008 of 'pc' stored at 'fp' + 0. If the trace function then looks
12009 at location pc - 12 and the top 8 bits are set, then we know
12010 that there is a function name embedded immediately preceding this
12011 location and has length ((pc[-3]) & 0xff000000).
12013 We assume that pc is declared as a pointer to an unsigned long.
12015 It is of no benefit to output the function name if we are assembling
12016 a leaf function. These function types will not contain a stack
12017 backtrace structure, therefore it is not possible to determine the
12020 arm_poke_function_name (FILE *stream, const char *name)
12022 unsigned long alignlength;
12023 unsigned long length;
12026 length = strlen (name) + 1;
12027 alignlength = ROUND_UP_WORD (length);
12029 ASM_OUTPUT_ASCII (stream, name, length);
12030 ASM_OUTPUT_ALIGN (stream, 2);
12031 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
12032 assemble_aligned_integer (UNITS_PER_WORD, x);
12035 /* Place some comments into the assembler stream
12036 describing the current function. */
12038 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
12040 unsigned long func_type;
12044 thumb1_output_function_prologue (f, frame_size);
12048 /* Sanity check. */
12049 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
12051 func_type = arm_current_func_type ();
12053 switch ((int) ARM_FUNC_TYPE (func_type))
12056 case ARM_FT_NORMAL:
12058 case ARM_FT_INTERWORKED:
12059 asm_fprintf (f, "\t%@ Function supports interworking.\n");
12062 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
12065 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
12067 case ARM_FT_EXCEPTION:
12068 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
12072 if (IS_NAKED (func_type))
12073 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
12075 if (IS_VOLATILE (func_type))
12076 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
12078 if (IS_NESTED (func_type))
12079 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
12080 if (IS_STACKALIGN (func_type))
12081 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
12083 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
12085 crtl->args.pretend_args_size, frame_size);
12087 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
12088 frame_pointer_needed,
12089 cfun->machine->uses_anonymous_args);
12091 if (cfun->machine->lr_save_eliminated)
12092 asm_fprintf (f, "\t%@ link register save eliminated.\n");
12094 if (crtl->calls_eh_return)
12095 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
12100 arm_output_epilogue (rtx sibling)
12103 unsigned long saved_regs_mask;
12104 unsigned long func_type;
12105 /* Floats_offset is the offset from the "virtual" frame. In an APCS
12106 frame that is $fp + 4 for a non-variadic function. */
12107 int floats_offset = 0;
12109 FILE * f = asm_out_file;
12110 unsigned int lrm_count = 0;
12111 int really_return = (sibling == NULL);
12113 arm_stack_offsets *offsets;
12115 /* If we have already generated the return instruction
12116 then it is futile to generate anything else. */
12117 if (use_return_insn (FALSE, sibling) &&
12118 (cfun->machine->return_used_this_function != 0))
12121 func_type = arm_current_func_type ();
12123 if (IS_NAKED (func_type))
12124 /* Naked functions don't have epilogues. */
12127 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
12131 /* A volatile function should never return. Call abort. */
12132 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
12133 assemble_external_libcall (op);
12134 output_asm_insn ("bl\t%a0", &op);
12139 /* If we are throwing an exception, then we really must be doing a
12140 return, so we can't tail-call. */
12141 gcc_assert (!crtl->calls_eh_return || really_return);
12143 offsets = arm_get_frame_offsets ();
12144 saved_regs_mask = offsets->saved_regs_mask;
12147 lrm_count = bit_count (saved_regs_mask);
12149 floats_offset = offsets->saved_args;
12150 /* Compute how far away the floats will be. */
12151 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
12152 if (saved_regs_mask & (1 << reg))
12153 floats_offset += 4;
12155 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
12157 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
12158 int vfp_offset = offsets->frame;
12160 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12162 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12163 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12165 floats_offset += 12;
12166 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
12167 reg, FP_REGNUM, floats_offset - vfp_offset);
12172 start_reg = LAST_FPA_REGNUM;
12174 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12176 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12178 floats_offset += 12;
12180 /* We can't unstack more than four registers at once. */
12181 if (start_reg - reg == 3)
12183 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
12184 reg, FP_REGNUM, floats_offset - vfp_offset);
12185 start_reg = reg - 1;
12190 if (reg != start_reg)
12191 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
12192 reg + 1, start_reg - reg,
12193 FP_REGNUM, floats_offset - vfp_offset);
12194 start_reg = reg - 1;
12198 /* Just in case the last register checked also needs unstacking. */
12199 if (reg != start_reg)
12200 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
12201 reg + 1, start_reg - reg,
12202 FP_REGNUM, floats_offset - vfp_offset);
12205 if (TARGET_HARD_FLOAT && TARGET_VFP)
12209 /* The fldmd insns do not have base+offset addressing
12210 modes, so we use IP to hold the address. */
12211 saved_size = arm_get_vfp_saved_size ();
12213 if (saved_size > 0)
12215 floats_offset += saved_size;
12216 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
12217 FP_REGNUM, floats_offset - vfp_offset);
12219 start_reg = FIRST_VFP_REGNUM;
12220 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12222 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12223 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12225 if (start_reg != reg)
12226 vfp_output_fldmd (f, IP_REGNUM,
12227 (start_reg - FIRST_VFP_REGNUM) / 2,
12228 (reg - start_reg) / 2);
12229 start_reg = reg + 2;
12232 if (start_reg != reg)
12233 vfp_output_fldmd (f, IP_REGNUM,
12234 (start_reg - FIRST_VFP_REGNUM) / 2,
12235 (reg - start_reg) / 2);
12240 /* The frame pointer is guaranteed to be non-double-word aligned.
12241 This is because it is set to (old_stack_pointer - 4) and the
12242 old_stack_pointer was double word aligned. Thus the offset to
12243 the iWMMXt registers to be loaded must also be non-double-word
12244 sized, so that the resultant address *is* double-word aligned.
12245 We can ignore floats_offset since that was already included in
12246 the live_regs_mask. */
12247 lrm_count += (lrm_count % 2 ? 2 : 1);
12249 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12250 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12252 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
12253 reg, FP_REGNUM, lrm_count * 4);
12258 /* saved_regs_mask should contain the IP, which at the time of stack
12259 frame generation actually contains the old stack pointer. So a
12260 quick way to unwind the stack is just pop the IP register directly
12261 into the stack pointer. */
12262 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
12263 saved_regs_mask &= ~ (1 << IP_REGNUM);
12264 saved_regs_mask |= (1 << SP_REGNUM);
12266 /* There are two registers left in saved_regs_mask - LR and PC. We
12267 only need to restore the LR register (the return address), but to
12268 save time we can load it directly into the PC, unless we need a
12269 special function exit sequence, or we are not really returning. */
12271 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
12272 && !crtl->calls_eh_return)
12273 /* Delete the LR from the register mask, so that the LR on
12274 the stack is loaded into the PC in the register mask. */
12275 saved_regs_mask &= ~ (1 << LR_REGNUM);
12277 saved_regs_mask &= ~ (1 << PC_REGNUM);
12279 /* We must use SP as the base register, because SP is one of the
12280 registers being restored. If an interrupt or page fault
12281 happens in the ldm instruction, the SP might or might not
12282 have been restored. That would be bad, as then SP will no
12283 longer indicate the safe area of stack, and we can get stack
12284 corruption. Using SP as the base register means that it will
12285 be reset correctly to the original value, should an interrupt
12286 occur. If the stack pointer already points at the right
12287 place, then omit the subtraction. */
12288 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
12289 || cfun->calls_alloca)
12290 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
12291 4 * bit_count (saved_regs_mask));
12292 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
12294 if (IS_INTERRUPT (func_type))
12295 /* Interrupt handlers will have pushed the
12296 IP onto the stack, so restore it now. */
12297 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
12301 /* This branch is executed for ARM mode (non-apcs frames) and
12302 Thumb-2 mode. Frame layout is essentially the same for those
12303 cases, except that in ARM mode frame pointer points to the
12304 first saved register, while in Thumb-2 mode the frame pointer points
12305 to the last saved register.
12307 It is possible to make frame pointer point to last saved
12308 register in both cases, and remove some conditionals below.
12309 That means that fp setup in prologue would be just "mov fp, sp"
12310 and sp restore in epilogue would be just "mov sp, fp", whereas
12311 now we have to use add/sub in those cases. However, the value
12312 of that would be marginal, as both mov and add/sub are 32-bit
12313 in ARM mode, and it would require extra conditionals
12314 in arm_expand_prologue to distingish ARM-apcs-frame case
12315 (where frame pointer is required to point at first register)
12316 and ARM-non-apcs-frame. Therefore, such change is postponed
12317 until real need arise. */
12318 unsigned HOST_WIDE_INT amount;
12320 /* Restore stack pointer if necessary. */
12321 if (TARGET_ARM && frame_pointer_needed)
12323 operands[0] = stack_pointer_rtx;
12324 operands[1] = hard_frame_pointer_rtx;
12326 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
12327 output_add_immediate (operands);
12331 if (frame_pointer_needed)
12333 /* For Thumb-2 restore sp from the frame pointer.
12334 Operand restrictions mean we have to incrememnt FP, then copy
12336 amount = offsets->locals_base - offsets->saved_regs;
12337 operands[0] = hard_frame_pointer_rtx;
12341 unsigned long count;
12342 operands[0] = stack_pointer_rtx;
12343 amount = offsets->outgoing_args - offsets->saved_regs;
12344 /* pop call clobbered registers if it avoids a
12345 separate stack adjustment. */
12346 count = offsets->saved_regs - offsets->saved_args;
12349 && !crtl->calls_eh_return
12350 && bit_count(saved_regs_mask) * 4 == count
12351 && !IS_INTERRUPT (func_type)
12352 && !crtl->tail_call_emit)
12354 unsigned long mask;
12355 mask = (1 << (arm_size_return_regs() / 4)) - 1;
12357 mask &= ~saved_regs_mask;
12359 while (bit_count (mask) * 4 > amount)
12361 while ((mask & (1 << reg)) == 0)
12363 mask &= ~(1 << reg);
12365 if (bit_count (mask) * 4 == amount) {
12367 saved_regs_mask |= mask;
12374 operands[1] = operands[0];
12375 operands[2] = GEN_INT (amount);
12376 output_add_immediate (operands);
12378 if (frame_pointer_needed)
12379 asm_fprintf (f, "\tmov\t%r, %r\n",
12380 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
12383 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12385 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
12386 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12387 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
12392 start_reg = FIRST_FPA_REGNUM;
12394 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
12396 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12398 if (reg - start_reg == 3)
12400 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
12401 start_reg, SP_REGNUM);
12402 start_reg = reg + 1;
12407 if (reg != start_reg)
12408 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
12409 start_reg, reg - start_reg,
12412 start_reg = reg + 1;
12416 /* Just in case the last register checked also needs unstacking. */
12417 if (reg != start_reg)
12418 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
12419 start_reg, reg - start_reg, SP_REGNUM);
12422 if (TARGET_HARD_FLOAT && TARGET_VFP)
12424 start_reg = FIRST_VFP_REGNUM;
12425 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12427 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12428 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12430 if (start_reg != reg)
12431 vfp_output_fldmd (f, SP_REGNUM,
12432 (start_reg - FIRST_VFP_REGNUM) / 2,
12433 (reg - start_reg) / 2);
12434 start_reg = reg + 2;
12437 if (start_reg != reg)
12438 vfp_output_fldmd (f, SP_REGNUM,
12439 (start_reg - FIRST_VFP_REGNUM) / 2,
12440 (reg - start_reg) / 2);
12443 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
12444 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12445 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
12447 /* If we can, restore the LR into the PC. */
12448 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
12449 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
12450 && !IS_STACKALIGN (func_type)
12452 && crtl->args.pretend_args_size == 0
12453 && saved_regs_mask & (1 << LR_REGNUM)
12454 && !crtl->calls_eh_return)
12456 saved_regs_mask &= ~ (1 << LR_REGNUM);
12457 saved_regs_mask |= (1 << PC_REGNUM);
12458 rfe = IS_INTERRUPT (func_type);
12463 /* Load the registers off the stack. If we only have one register
12464 to load use the LDR instruction - it is faster. For Thumb-2
12465 always use pop and the assembler will pick the best instruction.*/
12466 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
12467 && !IS_INTERRUPT(func_type))
12469 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
12471 else if (saved_regs_mask)
12473 if (saved_regs_mask & (1 << SP_REGNUM))
12474 /* Note - write back to the stack register is not enabled
12475 (i.e. "ldmfd sp!..."). We know that the stack pointer is
12476 in the list of registers and if we add writeback the
12477 instruction becomes UNPREDICTABLE. */
12478 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
12480 else if (TARGET_ARM)
12481 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
12484 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
12487 if (crtl->args.pretend_args_size)
12489 /* Unwind the pre-pushed regs. */
12490 operands[0] = operands[1] = stack_pointer_rtx;
12491 operands[2] = GEN_INT (crtl->args.pretend_args_size);
12492 output_add_immediate (operands);
12496 /* We may have already restored PC directly from the stack. */
12497 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
12500 /* Stack adjustment for exception handler. */
12501 if (crtl->calls_eh_return)
12502 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
12503 ARM_EH_STACKADJ_REGNUM);
12505 /* Generate the return instruction. */
12506 switch ((int) ARM_FUNC_TYPE (func_type))
12510 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
12513 case ARM_FT_EXCEPTION:
12514 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
12517 case ARM_FT_INTERWORKED:
12518 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
12522 if (IS_STACKALIGN (func_type))
12524 /* See comment in arm_expand_prologue. */
12525 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
12527 if (arm_arch5 || arm_arch4t)
12528 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
12530 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
12538 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
12539 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
12541 arm_stack_offsets *offsets;
12547 /* Emit any call-via-reg trampolines that are needed for v4t support
12548 of call_reg and call_value_reg type insns. */
12549 for (regno = 0; regno < LR_REGNUM; regno++)
12551 rtx label = cfun->machine->call_via[regno];
12555 switch_to_section (function_section (current_function_decl));
12556 targetm.asm_out.internal_label (asm_out_file, "L",
12557 CODE_LABEL_NUMBER (label));
12558 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
12562 /* ??? Probably not safe to set this here, since it assumes that a
12563 function will be emitted as assembly immediately after we generate
12564 RTL for it. This does not happen for inline functions. */
12565 cfun->machine->return_used_this_function = 0;
12567 else /* TARGET_32BIT */
12569 /* We need to take into account any stack-frame rounding. */
12570 offsets = arm_get_frame_offsets ();
12572 gcc_assert (!use_return_insn (FALSE, NULL)
12573 || (cfun->machine->return_used_this_function != 0)
12574 || offsets->saved_regs == offsets->outgoing_args
12575 || frame_pointer_needed);
12577 /* Reset the ARM-specific per-function variables. */
12578 after_arm_reorg = 0;
12582 /* Generate and emit an insn that we will recognize as a push_multi.
12583 Unfortunately, since this insn does not reflect very well the actual
12584 semantics of the operation, we need to annotate the insn for the benefit
12585 of DWARF2 frame unwind information. */
12587 emit_multi_reg_push (unsigned long mask)
12590 int num_dwarf_regs;
12594 int dwarf_par_index;
12597 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12598 if (mask & (1 << i))
12601 gcc_assert (num_regs && num_regs <= 16);
12603 /* We don't record the PC in the dwarf frame information. */
12604 num_dwarf_regs = num_regs;
12605 if (mask & (1 << PC_REGNUM))
12608 /* For the body of the insn we are going to generate an UNSPEC in
12609 parallel with several USEs. This allows the insn to be recognized
12610 by the push_multi pattern in the arm.md file. The insn looks
12611 something like this:
12614 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
12615 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
12616 (use (reg:SI 11 fp))
12617 (use (reg:SI 12 ip))
12618 (use (reg:SI 14 lr))
12619 (use (reg:SI 15 pc))
12622 For the frame note however, we try to be more explicit and actually
12623 show each register being stored into the stack frame, plus a (single)
12624 decrement of the stack pointer. We do it this way in order to be
12625 friendly to the stack unwinding code, which only wants to see a single
12626 stack decrement per instruction. The RTL we generate for the note looks
12627 something like this:
12630 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
12631 (set (mem:SI (reg:SI sp)) (reg:SI r4))
12632 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
12633 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
12634 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
12637 This sequence is used both by the code to support stack unwinding for
12638 exceptions handlers and the code to generate dwarf2 frame debugging. */
12640 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
12641 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
12642 dwarf_par_index = 1;
12644 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12646 if (mask & (1 << i))
12648 reg = gen_rtx_REG (SImode, i);
12650 XVECEXP (par, 0, 0)
12651 = gen_rtx_SET (VOIDmode,
12652 gen_frame_mem (BLKmode,
12653 gen_rtx_PRE_DEC (BLKmode,
12654 stack_pointer_rtx)),
12655 gen_rtx_UNSPEC (BLKmode,
12656 gen_rtvec (1, reg),
12657 UNSPEC_PUSH_MULT));
12659 if (i != PC_REGNUM)
12661 tmp = gen_rtx_SET (VOIDmode,
12662 gen_frame_mem (SImode, stack_pointer_rtx),
12664 RTX_FRAME_RELATED_P (tmp) = 1;
12665 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
12673 for (j = 1, i++; j < num_regs; i++)
12675 if (mask & (1 << i))
12677 reg = gen_rtx_REG (SImode, i);
12679 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
12681 if (i != PC_REGNUM)
12684 = gen_rtx_SET (VOIDmode,
12685 gen_frame_mem (SImode,
12686 plus_constant (stack_pointer_rtx,
12689 RTX_FRAME_RELATED_P (tmp) = 1;
12690 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
12697 par = emit_insn (par);
12699 tmp = gen_rtx_SET (VOIDmode,
12701 plus_constant (stack_pointer_rtx, -4 * num_regs));
12702 RTX_FRAME_RELATED_P (tmp) = 1;
12703 XVECEXP (dwarf, 0, 0) = tmp;
12705 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12710 /* Calculate the size of the return value that is passed in registers. */
12712 arm_size_return_regs (void)
12714 enum machine_mode mode;
12716 if (crtl->return_rtx != 0)
12717 mode = GET_MODE (crtl->return_rtx);
12719 mode = DECL_MODE (DECL_RESULT (current_function_decl));
12721 return GET_MODE_SIZE (mode);
12725 emit_sfm (int base_reg, int count)
12732 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12733 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12735 reg = gen_rtx_REG (XFmode, base_reg++);
12737 XVECEXP (par, 0, 0)
12738 = gen_rtx_SET (VOIDmode,
12739 gen_frame_mem (BLKmode,
12740 gen_rtx_PRE_DEC (BLKmode,
12741 stack_pointer_rtx)),
12742 gen_rtx_UNSPEC (BLKmode,
12743 gen_rtvec (1, reg),
12744 UNSPEC_PUSH_MULT));
12745 tmp = gen_rtx_SET (VOIDmode,
12746 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
12747 RTX_FRAME_RELATED_P (tmp) = 1;
12748 XVECEXP (dwarf, 0, 1) = tmp;
12750 for (i = 1; i < count; i++)
12752 reg = gen_rtx_REG (XFmode, base_reg++);
12753 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12755 tmp = gen_rtx_SET (VOIDmode,
12756 gen_frame_mem (XFmode,
12757 plus_constant (stack_pointer_rtx,
12760 RTX_FRAME_RELATED_P (tmp) = 1;
12761 XVECEXP (dwarf, 0, i + 1) = tmp;
12764 tmp = gen_rtx_SET (VOIDmode,
12766 plus_constant (stack_pointer_rtx, -12 * count));
12768 RTX_FRAME_RELATED_P (tmp) = 1;
12769 XVECEXP (dwarf, 0, 0) = tmp;
12771 par = emit_insn (par);
12772 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12778 /* Return true if the current function needs to save/restore LR. */
12781 thumb_force_lr_save (void)
12783 return !cfun->machine->lr_save_eliminated
12784 && (!leaf_function_p ()
12785 || thumb_far_jump_used_p ()
12786 || df_regs_ever_live_p (LR_REGNUM));
12790 /* Compute the distance from register FROM to register TO.
12791 These can be the arg pointer (26), the soft frame pointer (25),
12792 the stack pointer (13) or the hard frame pointer (11).
12793 In thumb mode r7 is used as the soft frame pointer, if needed.
12794 Typical stack layout looks like this:
12796 old stack pointer -> | |
12799 | | saved arguments for
12800 | | vararg functions
12803 hard FP & arg pointer -> | | \
12811 soft frame pointer -> | | /
12816 locals base pointer -> | | /
12821 current stack pointer -> | | /
12824 For a given function some or all of these stack components
12825 may not be needed, giving rise to the possibility of
12826 eliminating some of the registers.
12828 The values returned by this function must reflect the behavior
12829 of arm_expand_prologue() and arm_compute_save_reg_mask().
12831 The sign of the number returned reflects the direction of stack
12832 growth, so the values are positive for all eliminations except
12833 from the soft frame pointer to the hard frame pointer.
12835 SFP may point just inside the local variables block to ensure correct
12839 /* Calculate stack offsets. These are used to calculate register elimination
12840 offsets and in prologue/epilogue code. Also calculates which registers
12841 should be saved. */
12843 static arm_stack_offsets *
12844 arm_get_frame_offsets (void)
12846 struct arm_stack_offsets *offsets;
12847 unsigned long func_type;
12851 HOST_WIDE_INT frame_size;
12854 offsets = &cfun->machine->stack_offsets;
12856 /* We need to know if we are a leaf function. Unfortunately, it
12857 is possible to be called after start_sequence has been called,
12858 which causes get_insns to return the insns for the sequence,
12859 not the function, which will cause leaf_function_p to return
12860 the incorrect result.
12862 to know about leaf functions once reload has completed, and the
12863 frame size cannot be changed after that time, so we can safely
12864 use the cached value. */
12866 if (reload_completed)
12869 /* Initially this is the size of the local variables. It will translated
12870 into an offset once we have determined the size of preceding data. */
12871 frame_size = ROUND_UP_WORD (get_frame_size ());
12873 leaf = leaf_function_p ();
12875 /* Space for variadic functions. */
12876 offsets->saved_args = crtl->args.pretend_args_size;
12878 /* In Thumb mode this is incorrect, but never used. */
12879 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
12880 arm_compute_static_chain_stack_bytes();
12884 unsigned int regno;
12886 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
12887 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12888 saved = core_saved;
12890 /* We know that SP will be doubleword aligned on entry, and we must
12891 preserve that condition at any subroutine call. We also require the
12892 soft frame pointer to be doubleword aligned. */
12894 if (TARGET_REALLY_IWMMXT)
12896 /* Check for the call-saved iWMMXt registers. */
12897 for (regno = FIRST_IWMMXT_REGNUM;
12898 regno <= LAST_IWMMXT_REGNUM;
12900 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12904 func_type = arm_current_func_type ();
12905 if (! IS_VOLATILE (func_type))
12907 /* Space for saved FPA registers. */
12908 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
12909 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12912 /* Space for saved VFP registers. */
12913 if (TARGET_HARD_FLOAT && TARGET_VFP)
12914 saved += arm_get_vfp_saved_size ();
12917 else /* TARGET_THUMB1 */
12919 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
12920 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12921 saved = core_saved;
12922 if (TARGET_BACKTRACE)
12926 /* Saved registers include the stack frame. */
12927 offsets->saved_regs = offsets->saved_args + saved +
12928 arm_compute_static_chain_stack_bytes();
12929 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
12930 /* A leaf function does not need any stack alignment if it has nothing
12932 if (leaf && frame_size == 0)
12934 offsets->outgoing_args = offsets->soft_frame;
12935 offsets->locals_base = offsets->soft_frame;
12939 /* Ensure SFP has the correct alignment. */
12940 if (ARM_DOUBLEWORD_ALIGN
12941 && (offsets->soft_frame & 7))
12943 offsets->soft_frame += 4;
12944 /* Try to align stack by pushing an extra reg. Don't bother doing this
12945 when there is a stack frame as the alignment will be rolled into
12946 the normal stack adjustment. */
12947 if (frame_size + crtl->outgoing_args_size == 0)
12951 /* If it is safe to use r3, then do so. This sometimes
12952 generates better code on Thumb-2 by avoiding the need to
12953 use 32-bit push/pop instructions. */
12954 if (!crtl->tail_call_emit
12955 && arm_size_return_regs () <= 12)
12960 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
12962 if ((offsets->saved_regs_mask & (1 << i)) == 0)
12971 offsets->saved_regs += 4;
12972 offsets->saved_regs_mask |= (1 << reg);
12977 offsets->locals_base = offsets->soft_frame + frame_size;
12978 offsets->outgoing_args = (offsets->locals_base
12979 + crtl->outgoing_args_size);
12981 if (ARM_DOUBLEWORD_ALIGN)
12983 /* Ensure SP remains doubleword aligned. */
12984 if (offsets->outgoing_args & 7)
12985 offsets->outgoing_args += 4;
12986 gcc_assert (!(offsets->outgoing_args & 7));
12993 /* Calculate the relative offsets for the different stack pointers. Positive
12994 offsets are in the direction of stack growth. */
12997 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
12999 arm_stack_offsets *offsets;
13001 offsets = arm_get_frame_offsets ();
13003 /* OK, now we have enough information to compute the distances.
13004 There must be an entry in these switch tables for each pair
13005 of registers in ELIMINABLE_REGS, even if some of the entries
13006 seem to be redundant or useless. */
13009 case ARG_POINTER_REGNUM:
13012 case THUMB_HARD_FRAME_POINTER_REGNUM:
13015 case FRAME_POINTER_REGNUM:
13016 /* This is the reverse of the soft frame pointer
13017 to hard frame pointer elimination below. */
13018 return offsets->soft_frame - offsets->saved_args;
13020 case ARM_HARD_FRAME_POINTER_REGNUM:
13021 /* This is only non-zero in the case where the static chain register
13022 is stored above the frame. */
13023 return offsets->frame - offsets->saved_args - 4;
13025 case STACK_POINTER_REGNUM:
13026 /* If nothing has been pushed on the stack at all
13027 then this will return -4. This *is* correct! */
13028 return offsets->outgoing_args - (offsets->saved_args + 4);
13031 gcc_unreachable ();
13033 gcc_unreachable ();
13035 case FRAME_POINTER_REGNUM:
13038 case THUMB_HARD_FRAME_POINTER_REGNUM:
13041 case ARM_HARD_FRAME_POINTER_REGNUM:
13042 /* The hard frame pointer points to the top entry in the
13043 stack frame. The soft frame pointer to the bottom entry
13044 in the stack frame. If there is no stack frame at all,
13045 then they are identical. */
13047 return offsets->frame - offsets->soft_frame;
13049 case STACK_POINTER_REGNUM:
13050 return offsets->outgoing_args - offsets->soft_frame;
13053 gcc_unreachable ();
13055 gcc_unreachable ();
13058 /* You cannot eliminate from the stack pointer.
13059 In theory you could eliminate from the hard frame
13060 pointer to the stack pointer, but this will never
13061 happen, since if a stack frame is not needed the
13062 hard frame pointer will never be used. */
13063 gcc_unreachable ();
13068 /* Emit RTL to save coprocessor registers on function entry. Returns the
13069 number of bytes pushed. */
13072 arm_save_coproc_regs(void)
13074 int saved_size = 0;
13076 unsigned start_reg;
13079 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13080 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13082 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
13083 insn = gen_rtx_MEM (V2SImode, insn);
13084 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
13085 RTX_FRAME_RELATED_P (insn) = 1;
13089 /* Save any floating point call-saved registers used by this
13091 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
13093 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13094 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13096 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
13097 insn = gen_rtx_MEM (XFmode, insn);
13098 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
13099 RTX_FRAME_RELATED_P (insn) = 1;
13105 start_reg = LAST_FPA_REGNUM;
13107 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13109 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13111 if (start_reg - reg == 3)
13113 insn = emit_sfm (reg, 4);
13114 RTX_FRAME_RELATED_P (insn) = 1;
13116 start_reg = reg - 1;
13121 if (start_reg != reg)
13123 insn = emit_sfm (reg + 1, start_reg - reg);
13124 RTX_FRAME_RELATED_P (insn) = 1;
13125 saved_size += (start_reg - reg) * 12;
13127 start_reg = reg - 1;
13131 if (start_reg != reg)
13133 insn = emit_sfm (reg + 1, start_reg - reg);
13134 saved_size += (start_reg - reg) * 12;
13135 RTX_FRAME_RELATED_P (insn) = 1;
13138 if (TARGET_HARD_FLOAT && TARGET_VFP)
13140 start_reg = FIRST_VFP_REGNUM;
13142 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13144 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13145 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13147 if (start_reg != reg)
13148 saved_size += vfp_emit_fstmd (start_reg,
13149 (reg - start_reg) / 2);
13150 start_reg = reg + 2;
13153 if (start_reg != reg)
13154 saved_size += vfp_emit_fstmd (start_reg,
13155 (reg - start_reg) / 2);
13161 /* Set the Thumb frame pointer from the stack pointer. */
13164 thumb_set_frame_pointer (arm_stack_offsets *offsets)
13166 HOST_WIDE_INT amount;
13169 amount = offsets->outgoing_args - offsets->locals_base;
13171 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13172 stack_pointer_rtx, GEN_INT (amount)));
13175 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
13176 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
13177 expects the first two operands to be the same. */
13180 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13182 hard_frame_pointer_rtx));
13186 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13187 hard_frame_pointer_rtx,
13188 stack_pointer_rtx));
13190 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
13191 plus_constant (stack_pointer_rtx, amount));
13192 RTX_FRAME_RELATED_P (dwarf) = 1;
13193 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
13196 RTX_FRAME_RELATED_P (insn) = 1;
13199 /* Generate the prologue instructions for entry into an ARM or Thumb-2
13202 arm_expand_prologue (void)
13207 unsigned long live_regs_mask;
13208 unsigned long func_type;
13210 int saved_pretend_args = 0;
13211 int saved_regs = 0;
13212 unsigned HOST_WIDE_INT args_to_push;
13213 arm_stack_offsets *offsets;
13215 func_type = arm_current_func_type ();
13217 /* Naked functions don't have prologues. */
13218 if (IS_NAKED (func_type))
13221 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
13222 args_to_push = crtl->args.pretend_args_size;
13224 /* Compute which register we will have to save onto the stack. */
13225 offsets = arm_get_frame_offsets ();
13226 live_regs_mask = offsets->saved_regs_mask;
13228 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
13230 if (IS_STACKALIGN (func_type))
13235 /* Handle a word-aligned stack pointer. We generate the following:
13240 <save and restore r0 in normal prologue/epilogue>
13244 The unwinder doesn't need to know about the stack realignment.
13245 Just tell it we saved SP in r0. */
13246 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
13248 r0 = gen_rtx_REG (SImode, 0);
13249 r1 = gen_rtx_REG (SImode, 1);
13250 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
13251 compiler won't choke. */
13252 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
13253 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
13254 insn = gen_movsi (r0, stack_pointer_rtx);
13255 RTX_FRAME_RELATED_P (insn) = 1;
13256 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
13258 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
13259 emit_insn (gen_movsi (stack_pointer_rtx, r1));
13262 /* For APCS frames, if IP register is clobbered
13263 when creating frame, save that register in a special
13265 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13267 if (IS_INTERRUPT (func_type))
13269 /* Interrupt functions must not corrupt any registers.
13270 Creating a frame pointer however, corrupts the IP
13271 register, so we must push it first. */
13272 insn = emit_multi_reg_push (1 << IP_REGNUM);
13274 /* Do not set RTX_FRAME_RELATED_P on this insn.
13275 The dwarf stack unwinding code only wants to see one
13276 stack decrement per function, and this is not it. If
13277 this instruction is labeled as being part of the frame
13278 creation sequence then dwarf2out_frame_debug_expr will
13279 die when it encounters the assignment of IP to FP
13280 later on, since the use of SP here establishes SP as
13281 the CFA register and not IP.
13283 Anyway this instruction is not really part of the stack
13284 frame creation although it is part of the prologue. */
13286 else if (IS_NESTED (func_type))
13288 /* The Static chain register is the same as the IP register
13289 used as a scratch register during stack frame creation.
13290 To get around this need to find somewhere to store IP
13291 whilst the frame is being created. We try the following
13294 1. The last argument register.
13295 2. A slot on the stack above the frame. (This only
13296 works if the function is not a varargs function).
13297 3. Register r3, after pushing the argument registers
13300 Note - we only need to tell the dwarf2 backend about the SP
13301 adjustment in the second variant; the static chain register
13302 doesn't need to be unwound, as it doesn't contain a value
13303 inherited from the caller. */
13305 if (df_regs_ever_live_p (3) == false)
13306 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
13307 else if (args_to_push == 0)
13311 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
13314 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
13315 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
13318 /* Just tell the dwarf backend that we adjusted SP. */
13319 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13320 plus_constant (stack_pointer_rtx,
13322 RTX_FRAME_RELATED_P (insn) = 1;
13323 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
13327 /* Store the args on the stack. */
13328 if (cfun->machine->uses_anonymous_args)
13329 insn = emit_multi_reg_push
13330 ((0xf0 >> (args_to_push / 4)) & 0xf);
13333 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13334 GEN_INT (- args_to_push)));
13336 RTX_FRAME_RELATED_P (insn) = 1;
13338 saved_pretend_args = 1;
13339 fp_offset = args_to_push;
13342 /* Now reuse r3 to preserve IP. */
13343 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
13347 insn = emit_set_insn (ip_rtx,
13348 plus_constant (stack_pointer_rtx, fp_offset));
13349 RTX_FRAME_RELATED_P (insn) = 1;
13354 /* Push the argument registers, or reserve space for them. */
13355 if (cfun->machine->uses_anonymous_args)
13356 insn = emit_multi_reg_push
13357 ((0xf0 >> (args_to_push / 4)) & 0xf);
13360 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13361 GEN_INT (- args_to_push)));
13362 RTX_FRAME_RELATED_P (insn) = 1;
13365 /* If this is an interrupt service routine, and the link register
13366 is going to be pushed, and we're not generating extra
13367 push of IP (needed when frame is needed and frame layout if apcs),
13368 subtracting four from LR now will mean that the function return
13369 can be done with a single instruction. */
13370 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
13371 && (live_regs_mask & (1 << LR_REGNUM)) != 0
13372 && !(frame_pointer_needed && TARGET_APCS_FRAME)
13375 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
13377 emit_set_insn (lr, plus_constant (lr, -4));
13380 if (live_regs_mask)
13382 saved_regs += bit_count (live_regs_mask) * 4;
13383 if (optimize_size && !frame_pointer_needed
13384 && saved_regs == offsets->saved_regs - offsets->saved_args)
13386 /* If no coprocessor registers are being pushed and we don't have
13387 to worry about a frame pointer then push extra registers to
13388 create the stack frame. This is done is a way that does not
13389 alter the frame layout, so is independent of the epilogue. */
13393 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
13395 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
13396 if (frame && n * 4 >= frame)
13399 live_regs_mask |= (1 << n) - 1;
13400 saved_regs += frame;
13403 insn = emit_multi_reg_push (live_regs_mask);
13404 RTX_FRAME_RELATED_P (insn) = 1;
13407 if (! IS_VOLATILE (func_type))
13408 saved_regs += arm_save_coproc_regs ();
13410 if (frame_pointer_needed && TARGET_ARM)
13412 /* Create the new frame pointer. */
13413 if (TARGET_APCS_FRAME)
13415 insn = GEN_INT (-(4 + args_to_push + fp_offset));
13416 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
13417 RTX_FRAME_RELATED_P (insn) = 1;
13419 if (IS_NESTED (func_type))
13421 /* Recover the static chain register. */
13422 if (!df_regs_ever_live_p (3)
13423 || saved_pretend_args)
13424 insn = gen_rtx_REG (SImode, 3);
13425 else /* if (crtl->args.pretend_args_size == 0) */
13427 insn = plus_constant (hard_frame_pointer_rtx, 4);
13428 insn = gen_frame_mem (SImode, insn);
13430 emit_set_insn (ip_rtx, insn);
13431 /* Add a USE to stop propagate_one_insn() from barfing. */
13432 emit_insn (gen_prologue_use (ip_rtx));
13437 insn = GEN_INT (saved_regs - 4);
13438 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13439 stack_pointer_rtx, insn));
13440 RTX_FRAME_RELATED_P (insn) = 1;
13444 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
13446 /* This add can produce multiple insns for a large constant, so we
13447 need to get tricky. */
13448 rtx last = get_last_insn ();
13450 amount = GEN_INT (offsets->saved_args + saved_regs
13451 - offsets->outgoing_args);
13453 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13457 last = last ? NEXT_INSN (last) : get_insns ();
13458 RTX_FRAME_RELATED_P (last) = 1;
13460 while (last != insn);
13462 /* If the frame pointer is needed, emit a special barrier that
13463 will prevent the scheduler from moving stores to the frame
13464 before the stack adjustment. */
13465 if (frame_pointer_needed)
13466 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
13467 hard_frame_pointer_rtx));
13471 if (frame_pointer_needed && TARGET_THUMB2)
13472 thumb_set_frame_pointer (offsets);
13474 if (flag_pic && arm_pic_register != INVALID_REGNUM)
13476 unsigned long mask;
13478 mask = live_regs_mask;
13479 mask &= THUMB2_WORK_REGS;
13480 if (!IS_NESTED (func_type))
13481 mask |= (1 << IP_REGNUM);
13482 arm_load_pic_register (mask);
13485 /* If we are profiling, make sure no instructions are scheduled before
13486 the call to mcount. Similarly if the user has requested no
13487 scheduling in the prolog. Similarly if we want non-call exceptions
13488 using the EABI unwinder, to prevent faulting instructions from being
13489 swapped with a stack adjustment. */
13490 if (crtl->profile || !TARGET_SCHED_PROLOG
13491 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
13492 emit_insn (gen_blockage ());
13494 /* If the link register is being kept alive, with the return address in it,
13495 then make sure that it does not get reused by the ce2 pass. */
13496 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
13497 cfun->machine->lr_save_eliminated = 1;
13500 /* Print condition code to STREAM. Helper function for arm_print_operand. */
13502 arm_print_condition (FILE *stream)
13504 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
13506 /* Branch conversion is not implemented for Thumb-2. */
13509 output_operand_lossage ("predicated Thumb instruction");
13512 if (current_insn_predicate != NULL)
13514 output_operand_lossage
13515 ("predicated instruction in conditional sequence");
13519 fputs (arm_condition_codes[arm_current_cc], stream);
13521 else if (current_insn_predicate)
13523 enum arm_cond_code code;
13527 output_operand_lossage ("predicated Thumb instruction");
13531 code = get_arm_condition_code (current_insn_predicate);
13532 fputs (arm_condition_codes[code], stream);
13537 /* If CODE is 'd', then the X is a condition operand and the instruction
13538 should only be executed if the condition is true.
13539 if CODE is 'D', then the X is a condition operand and the instruction
13540 should only be executed if the condition is false: however, if the mode
13541 of the comparison is CCFPEmode, then always execute the instruction -- we
13542 do this because in these circumstances !GE does not necessarily imply LT;
13543 in these cases the instruction pattern will take care to make sure that
13544 an instruction containing %d will follow, thereby undoing the effects of
13545 doing this instruction unconditionally.
13546 If CODE is 'N' then X is a floating point operand that must be negated
13548 If CODE is 'B' then output a bitwise inverted value of X (a const int).
13549 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
13551 arm_print_operand (FILE *stream, rtx x, int code)
13556 fputs (ASM_COMMENT_START, stream);
13560 fputs (user_label_prefix, stream);
13564 fputs (REGISTER_PREFIX, stream);
13568 arm_print_condition (stream);
13572 /* Nothing in unified syntax, otherwise the current condition code. */
13573 if (!TARGET_UNIFIED_ASM)
13574 arm_print_condition (stream);
13578 /* The current condition code in unified syntax, otherwise nothing. */
13579 if (TARGET_UNIFIED_ASM)
13580 arm_print_condition (stream);
13584 /* The current condition code for a condition code setting instruction.
13585 Preceded by 's' in unified syntax, otherwise followed by 's'. */
13586 if (TARGET_UNIFIED_ASM)
13588 fputc('s', stream);
13589 arm_print_condition (stream);
13593 arm_print_condition (stream);
13594 fputc('s', stream);
13599 /* If the instruction is conditionally executed then print
13600 the current condition code, otherwise print 's'. */
13601 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
13602 if (current_insn_predicate)
13603 arm_print_condition (stream);
13605 fputc('s', stream);
13608 /* %# is a "break" sequence. It doesn't output anything, but is used to
13609 separate e.g. operand numbers from following text, if that text consists
13610 of further digits which we don't want to be part of the operand
13618 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13619 r = REAL_VALUE_NEGATE (r);
13620 fprintf (stream, "%s", fp_const_from_val (&r));
13624 /* An integer or symbol address without a preceding # sign. */
13626 switch (GET_CODE (x))
13629 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13633 output_addr_const (stream, x);
13637 gcc_unreachable ();
13642 if (GET_CODE (x) == CONST_INT)
13645 val = ARM_SIGN_EXTEND (~INTVAL (x));
13646 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
13650 putc ('~', stream);
13651 output_addr_const (stream, x);
13656 /* The low 16 bits of an immediate constant. */
13657 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
13661 fprintf (stream, "%s", arithmetic_instr (x, 1));
13664 /* Truncate Cirrus shift counts. */
13666 if (GET_CODE (x) == CONST_INT)
13668 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
13671 arm_print_operand (stream, x, 0);
13675 fprintf (stream, "%s", arithmetic_instr (x, 0));
13683 if (!shift_operator (x, SImode))
13685 output_operand_lossage ("invalid shift operand");
13689 shift = shift_op (x, &val);
13693 fprintf (stream, ", %s ", shift);
13695 arm_print_operand (stream, XEXP (x, 1), 0);
13697 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
13702 /* An explanation of the 'Q', 'R' and 'H' register operands:
13704 In a pair of registers containing a DI or DF value the 'Q'
13705 operand returns the register number of the register containing
13706 the least significant part of the value. The 'R' operand returns
13707 the register number of the register containing the most
13708 significant part of the value.
13710 The 'H' operand returns the higher of the two register numbers.
13711 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
13712 same as the 'Q' operand, since the most significant part of the
13713 value is held in the lower number register. The reverse is true
13714 on systems where WORDS_BIG_ENDIAN is false.
13716 The purpose of these operands is to distinguish between cases
13717 where the endian-ness of the values is important (for example
13718 when they are added together), and cases where the endian-ness
13719 is irrelevant, but the order of register operations is important.
13720 For example when loading a value from memory into a register
13721 pair, the endian-ness does not matter. Provided that the value
13722 from the lower memory address is put into the lower numbered
13723 register, and the value from the higher address is put into the
13724 higher numbered register, the load will work regardless of whether
13725 the value being loaded is big-wordian or little-wordian. The
13726 order of the two register loads can matter however, if the address
13727 of the memory location is actually held in one of the registers
13728 being overwritten by the load. */
13730 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13732 output_operand_lossage ("invalid operand for code '%c'", code);
13736 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
13740 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13742 output_operand_lossage ("invalid operand for code '%c'", code);
13746 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
13750 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13752 output_operand_lossage ("invalid operand for code '%c'", code);
13756 asm_fprintf (stream, "%r", REGNO (x) + 1);
13760 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13762 output_operand_lossage ("invalid operand for code '%c'", code);
13766 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
13770 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13772 output_operand_lossage ("invalid operand for code '%c'", code);
13776 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
13780 asm_fprintf (stream, "%r",
13781 GET_CODE (XEXP (x, 0)) == REG
13782 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
13786 asm_fprintf (stream, "{%r-%r}",
13788 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
13791 /* Like 'M', but writing doubleword vector registers, for use by Neon
13795 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
13796 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
13798 asm_fprintf (stream, "{d%d}", regno);
13800 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
13805 /* CONST_TRUE_RTX means always -- that's the default. */
13806 if (x == const_true_rtx)
13809 if (!COMPARISON_P (x))
13811 output_operand_lossage ("invalid operand for code '%c'", code);
13815 fputs (arm_condition_codes[get_arm_condition_code (x)],
13820 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
13821 want to do that. */
13822 if (x == const_true_rtx)
13824 output_operand_lossage ("instruction never executed");
13827 if (!COMPARISON_P (x))
13829 output_operand_lossage ("invalid operand for code '%c'", code);
13833 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
13834 (get_arm_condition_code (x))],
13838 /* Cirrus registers can be accessed in a variety of ways:
13839 single floating point (f)
13840 double floating point (d)
13842 64bit integer (dx). */
13843 case 'W': /* Cirrus register in F mode. */
13844 case 'X': /* Cirrus register in D mode. */
13845 case 'Y': /* Cirrus register in FX mode. */
13846 case 'Z': /* Cirrus register in DX mode. */
13847 gcc_assert (GET_CODE (x) == REG
13848 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
13850 fprintf (stream, "mv%s%s",
13852 : code == 'X' ? "d"
13853 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
13857 /* Print cirrus register in the mode specified by the register's mode. */
13860 int mode = GET_MODE (x);
13862 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
13864 output_operand_lossage ("invalid operand for code '%c'", code);
13868 fprintf (stream, "mv%s%s",
13869 mode == DFmode ? "d"
13870 : mode == SImode ? "fx"
13871 : mode == DImode ? "dx"
13872 : "f", reg_names[REGNO (x)] + 2);
13878 if (GET_CODE (x) != REG
13879 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
13880 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
13881 /* Bad value for wCG register number. */
13883 output_operand_lossage ("invalid operand for code '%c'", code);
13888 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
13891 /* Print an iWMMXt control register name. */
13893 if (GET_CODE (x) != CONST_INT
13895 || INTVAL (x) >= 16)
13896 /* Bad value for wC register number. */
13898 output_operand_lossage ("invalid operand for code '%c'", code);
13904 static const char * wc_reg_names [16] =
13906 "wCID", "wCon", "wCSSF", "wCASF",
13907 "wC4", "wC5", "wC6", "wC7",
13908 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
13909 "wC12", "wC13", "wC14", "wC15"
13912 fprintf (stream, wc_reg_names [INTVAL (x)]);
13916 /* Print a VFP/Neon double precision or quad precision register name. */
13920 int mode = GET_MODE (x);
13921 int is_quad = (code == 'q');
13924 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
13926 output_operand_lossage ("invalid operand for code '%c'", code);
13930 if (GET_CODE (x) != REG
13931 || !IS_VFP_REGNUM (REGNO (x)))
13933 output_operand_lossage ("invalid operand for code '%c'", code);
13938 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
13939 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
13941 output_operand_lossage ("invalid operand for code '%c'", code);
13945 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
13946 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
13950 /* These two codes print the low/high doubleword register of a Neon quad
13951 register, respectively. For pair-structure types, can also print
13952 low/high quadword registers. */
13956 int mode = GET_MODE (x);
13959 if ((GET_MODE_SIZE (mode) != 16
13960 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
13962 output_operand_lossage ("invalid operand for code '%c'", code);
13967 if (!NEON_REGNO_OK_FOR_QUAD (regno))
13969 output_operand_lossage ("invalid operand for code '%c'", code);
13973 if (GET_MODE_SIZE (mode) == 16)
13974 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
13975 + (code == 'f' ? 1 : 0));
13977 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
13978 + (code == 'f' ? 1 : 0));
13982 /* Print a VFPv3 floating-point constant, represented as an integer
13986 int index = vfp3_const_double_index (x);
13987 gcc_assert (index != -1);
13988 fprintf (stream, "%d", index);
13992 /* Print bits representing opcode features for Neon.
13994 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
13995 and polynomials as unsigned.
13997 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
13999 Bit 2 is 1 for rounding functions, 0 otherwise. */
14001 /* Identify the type as 's', 'u', 'p' or 'f'. */
14004 HOST_WIDE_INT bits = INTVAL (x);
14005 fputc ("uspf"[bits & 3], stream);
14009 /* Likewise, but signed and unsigned integers are both 'i'. */
14012 HOST_WIDE_INT bits = INTVAL (x);
14013 fputc ("iipf"[bits & 3], stream);
14017 /* As for 'T', but emit 'u' instead of 'p'. */
14020 HOST_WIDE_INT bits = INTVAL (x);
14021 fputc ("usuf"[bits & 3], stream);
14025 /* Bit 2: rounding (vs none). */
14028 HOST_WIDE_INT bits = INTVAL (x);
14029 fputs ((bits & 4) != 0 ? "r" : "", stream);
14033 /* Memory operand for vld1/vst1 instruction. */
14037 bool postinc = FALSE;
14038 gcc_assert (GET_CODE (x) == MEM);
14039 addr = XEXP (x, 0);
14040 if (GET_CODE (addr) == POST_INC)
14043 addr = XEXP (addr, 0);
14045 asm_fprintf (stream, "[%r]", REGNO (addr));
14047 fputs("!", stream);
14051 /* Register specifier for vld1.16/vst1.16. Translate the S register
14052 number into a D register number and element index. */
14055 int mode = GET_MODE (x);
14058 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
14060 output_operand_lossage ("invalid operand for code '%c'", code);
14065 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
14067 output_operand_lossage ("invalid operand for code '%c'", code);
14071 regno = regno - FIRST_VFP_REGNUM;
14072 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
14079 output_operand_lossage ("missing operand");
14083 switch (GET_CODE (x))
14086 asm_fprintf (stream, "%r", REGNO (x));
14090 output_memory_reference_mode = GET_MODE (x);
14091 output_address (XEXP (x, 0));
14098 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
14099 sizeof (fpstr), 0, 1);
14100 fprintf (stream, "#%s", fpstr);
14103 fprintf (stream, "#%s", fp_immediate_constant (x));
14107 gcc_assert (GET_CODE (x) != NEG);
14108 fputc ('#', stream);
14109 if (GET_CODE (x) == HIGH)
14111 fputs (":lower16:", stream);
14115 output_addr_const (stream, x);
14121 /* Target hook for assembling integer objects. The ARM version needs to
14122 handle word-sized values specially. */
14124 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
14126 enum machine_mode mode;
14128 if (size == UNITS_PER_WORD && aligned_p)
14130 fputs ("\t.word\t", asm_out_file);
14131 output_addr_const (asm_out_file, x);
14133 /* Mark symbols as position independent. We only do this in the
14134 .text segment, not in the .data segment. */
14135 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
14136 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
14138 /* See legitimize_pic_address for an explanation of the
14139 TARGET_VXWORKS_RTP check. */
14140 if (TARGET_VXWORKS_RTP
14141 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
14142 fputs ("(GOT)", asm_out_file);
14144 fputs ("(GOTOFF)", asm_out_file);
14146 fputc ('\n', asm_out_file);
14150 mode = GET_MODE (x);
14152 if (arm_vector_mode_supported_p (mode))
14156 gcc_assert (GET_CODE (x) == CONST_VECTOR);
14158 units = CONST_VECTOR_NUNITS (x);
14159 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
14161 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14162 for (i = 0; i < units; i++)
14164 rtx elt = CONST_VECTOR_ELT (x, i);
14166 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
14169 for (i = 0; i < units; i++)
14171 rtx elt = CONST_VECTOR_ELT (x, i);
14172 REAL_VALUE_TYPE rval;
14174 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
14177 (rval, GET_MODE_INNER (mode),
14178 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
14184 return default_assemble_integer (x, size, aligned_p);
14188 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
14192 if (!TARGET_AAPCS_BASED)
14195 default_named_section_asm_out_constructor
14196 : default_named_section_asm_out_destructor) (symbol, priority);
14200 /* Put these in the .init_array section, using a special relocation. */
14201 if (priority != DEFAULT_INIT_PRIORITY)
14204 sprintf (buf, "%s.%.5u",
14205 is_ctor ? ".init_array" : ".fini_array",
14207 s = get_section (buf, SECTION_WRITE, NULL_TREE);
14214 switch_to_section (s);
14215 assemble_align (POINTER_SIZE);
14216 fputs ("\t.word\t", asm_out_file);
14217 output_addr_const (asm_out_file, symbol);
14218 fputs ("(target1)\n", asm_out_file);
14221 /* Add a function to the list of static constructors. */
14224 arm_elf_asm_constructor (rtx symbol, int priority)
14226 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
14229 /* Add a function to the list of static destructors. */
14232 arm_elf_asm_destructor (rtx symbol, int priority)
14234 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
14237 /* A finite state machine takes care of noticing whether or not instructions
14238 can be conditionally executed, and thus decrease execution time and code
14239 size by deleting branch instructions. The fsm is controlled by
14240 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
14242 /* The state of the fsm controlling condition codes are:
14243 0: normal, do nothing special
14244 1: make ASM_OUTPUT_OPCODE not output this instruction
14245 2: make ASM_OUTPUT_OPCODE not output this instruction
14246 3: make instructions conditional
14247 4: make instructions conditional
14249 State transitions (state->state by whom under condition):
14250 0 -> 1 final_prescan_insn if the `target' is a label
14251 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
14252 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
14253 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
14254 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
14255 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
14256 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
14257 (the target insn is arm_target_insn).
14259 If the jump clobbers the conditions then we use states 2 and 4.
14261 A similar thing can be done with conditional return insns.
14263 XXX In case the `target' is an unconditional branch, this conditionalising
14264 of the instructions always reduces code size, but not always execution
14265 time. But then, I want to reduce the code size to somewhere near what
14266 /bin/cc produces. */
14268 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
14269 instructions. When a COND_EXEC instruction is seen the subsequent
14270 instructions are scanned so that multiple conditional instructions can be
14271 combined into a single IT block. arm_condexec_count and arm_condexec_mask
14272 specify the length and true/false mask for the IT block. These will be
14273 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
14275 /* Returns the index of the ARM condition code string in
14276 `arm_condition_codes'. COMPARISON should be an rtx like
14277 `(eq (...) (...))'. */
14278 static enum arm_cond_code
14279 get_arm_condition_code (rtx comparison)
14281 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
14282 enum arm_cond_code code;
14283 enum rtx_code comp_code = GET_CODE (comparison);
14285 if (GET_MODE_CLASS (mode) != MODE_CC)
14286 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
14287 XEXP (comparison, 1));
14291 case CC_DNEmode: code = ARM_NE; goto dominance;
14292 case CC_DEQmode: code = ARM_EQ; goto dominance;
14293 case CC_DGEmode: code = ARM_GE; goto dominance;
14294 case CC_DGTmode: code = ARM_GT; goto dominance;
14295 case CC_DLEmode: code = ARM_LE; goto dominance;
14296 case CC_DLTmode: code = ARM_LT; goto dominance;
14297 case CC_DGEUmode: code = ARM_CS; goto dominance;
14298 case CC_DGTUmode: code = ARM_HI; goto dominance;
14299 case CC_DLEUmode: code = ARM_LS; goto dominance;
14300 case CC_DLTUmode: code = ARM_CC;
14303 gcc_assert (comp_code == EQ || comp_code == NE);
14305 if (comp_code == EQ)
14306 return ARM_INVERSE_CONDITION_CODE (code);
14312 case NE: return ARM_NE;
14313 case EQ: return ARM_EQ;
14314 case GE: return ARM_PL;
14315 case LT: return ARM_MI;
14316 default: gcc_unreachable ();
14322 case NE: return ARM_NE;
14323 case EQ: return ARM_EQ;
14324 default: gcc_unreachable ();
14330 case NE: return ARM_MI;
14331 case EQ: return ARM_PL;
14332 default: gcc_unreachable ();
14337 /* These encodings assume that AC=1 in the FPA system control
14338 byte. This allows us to handle all cases except UNEQ and
14342 case GE: return ARM_GE;
14343 case GT: return ARM_GT;
14344 case LE: return ARM_LS;
14345 case LT: return ARM_MI;
14346 case NE: return ARM_NE;
14347 case EQ: return ARM_EQ;
14348 case ORDERED: return ARM_VC;
14349 case UNORDERED: return ARM_VS;
14350 case UNLT: return ARM_LT;
14351 case UNLE: return ARM_LE;
14352 case UNGT: return ARM_HI;
14353 case UNGE: return ARM_PL;
14354 /* UNEQ and LTGT do not have a representation. */
14355 case UNEQ: /* Fall through. */
14356 case LTGT: /* Fall through. */
14357 default: gcc_unreachable ();
14363 case NE: return ARM_NE;
14364 case EQ: return ARM_EQ;
14365 case GE: return ARM_LE;
14366 case GT: return ARM_LT;
14367 case LE: return ARM_GE;
14368 case LT: return ARM_GT;
14369 case GEU: return ARM_LS;
14370 case GTU: return ARM_CC;
14371 case LEU: return ARM_CS;
14372 case LTU: return ARM_HI;
14373 default: gcc_unreachable ();
14379 case LTU: return ARM_CS;
14380 case GEU: return ARM_CC;
14381 default: gcc_unreachable ();
14387 case NE: return ARM_NE;
14388 case EQ: return ARM_EQ;
14389 case GE: return ARM_GE;
14390 case GT: return ARM_GT;
14391 case LE: return ARM_LE;
14392 case LT: return ARM_LT;
14393 case GEU: return ARM_CS;
14394 case GTU: return ARM_HI;
14395 case LEU: return ARM_LS;
14396 case LTU: return ARM_CC;
14397 default: gcc_unreachable ();
14400 default: gcc_unreachable ();
14404 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
14407 thumb2_final_prescan_insn (rtx insn)
14409 rtx first_insn = insn;
14410 rtx body = PATTERN (insn);
14412 enum arm_cond_code code;
14416 /* Remove the previous insn from the count of insns to be output. */
14417 if (arm_condexec_count)
14418 arm_condexec_count--;
14420 /* Nothing to do if we are already inside a conditional block. */
14421 if (arm_condexec_count)
14424 if (GET_CODE (body) != COND_EXEC)
14427 /* Conditional jumps are implemented directly. */
14428 if (GET_CODE (insn) == JUMP_INSN)
14431 predicate = COND_EXEC_TEST (body);
14432 arm_current_cc = get_arm_condition_code (predicate);
14434 n = get_attr_ce_count (insn);
14435 arm_condexec_count = 1;
14436 arm_condexec_mask = (1 << n) - 1;
14437 arm_condexec_masklen = n;
14438 /* See if subsequent instructions can be combined into the same block. */
14441 insn = next_nonnote_insn (insn);
14443 /* Jumping into the middle of an IT block is illegal, so a label or
14444 barrier terminates the block. */
14445 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
14448 body = PATTERN (insn);
14449 /* USE and CLOBBER aren't really insns, so just skip them. */
14450 if (GET_CODE (body) == USE
14451 || GET_CODE (body) == CLOBBER)
14454 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
14455 if (GET_CODE (body) != COND_EXEC)
14457 /* Allow up to 4 conditionally executed instructions in a block. */
14458 n = get_attr_ce_count (insn);
14459 if (arm_condexec_masklen + n > 4)
14462 predicate = COND_EXEC_TEST (body);
14463 code = get_arm_condition_code (predicate);
14464 mask = (1 << n) - 1;
14465 if (arm_current_cc == code)
14466 arm_condexec_mask |= (mask << arm_condexec_masklen);
14467 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
14470 arm_condexec_count++;
14471 arm_condexec_masklen += n;
14473 /* A jump must be the last instruction in a conditional block. */
14474 if (GET_CODE(insn) == JUMP_INSN)
14477 /* Restore recog_data (getting the attributes of other insns can
14478 destroy this array, but final.c assumes that it remains intact
14479 across this call). */
14480 extract_constrain_insn_cached (first_insn);
14484 arm_final_prescan_insn (rtx insn)
14486 /* BODY will hold the body of INSN. */
14487 rtx body = PATTERN (insn);
14489 /* This will be 1 if trying to repeat the trick, and things need to be
14490 reversed if it appears to fail. */
14493 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
14494 taken are clobbered, even if the rtl suggests otherwise. It also
14495 means that we have to grub around within the jump expression to find
14496 out what the conditions are when the jump isn't taken. */
14497 int jump_clobbers = 0;
14499 /* If we start with a return insn, we only succeed if we find another one. */
14500 int seeking_return = 0;
14502 /* START_INSN will hold the insn from where we start looking. This is the
14503 first insn after the following code_label if REVERSE is true. */
14504 rtx start_insn = insn;
14506 /* If in state 4, check if the target branch is reached, in order to
14507 change back to state 0. */
14508 if (arm_ccfsm_state == 4)
14510 if (insn == arm_target_insn)
14512 arm_target_insn = NULL;
14513 arm_ccfsm_state = 0;
14518 /* If in state 3, it is possible to repeat the trick, if this insn is an
14519 unconditional branch to a label, and immediately following this branch
14520 is the previous target label which is only used once, and the label this
14521 branch jumps to is not too far off. */
14522 if (arm_ccfsm_state == 3)
14524 if (simplejump_p (insn))
14526 start_insn = next_nonnote_insn (start_insn);
14527 if (GET_CODE (start_insn) == BARRIER)
14529 /* XXX Isn't this always a barrier? */
14530 start_insn = next_nonnote_insn (start_insn);
14532 if (GET_CODE (start_insn) == CODE_LABEL
14533 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
14534 && LABEL_NUSES (start_insn) == 1)
14539 else if (GET_CODE (body) == RETURN)
14541 start_insn = next_nonnote_insn (start_insn);
14542 if (GET_CODE (start_insn) == BARRIER)
14543 start_insn = next_nonnote_insn (start_insn);
14544 if (GET_CODE (start_insn) == CODE_LABEL
14545 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
14546 && LABEL_NUSES (start_insn) == 1)
14549 seeking_return = 1;
14558 gcc_assert (!arm_ccfsm_state || reverse);
14559 if (GET_CODE (insn) != JUMP_INSN)
14562 /* This jump might be paralleled with a clobber of the condition codes
14563 the jump should always come first */
14564 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
14565 body = XVECEXP (body, 0, 0);
14568 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
14569 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
14572 int fail = FALSE, succeed = FALSE;
14573 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
14574 int then_not_else = TRUE;
14575 rtx this_insn = start_insn, label = 0;
14577 /* If the jump cannot be done with one instruction, we cannot
14578 conditionally execute the instruction in the inverse case. */
14579 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
14585 /* Register the insn jumped to. */
14588 if (!seeking_return)
14589 label = XEXP (SET_SRC (body), 0);
14591 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
14592 label = XEXP (XEXP (SET_SRC (body), 1), 0);
14593 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
14595 label = XEXP (XEXP (SET_SRC (body), 2), 0);
14596 then_not_else = FALSE;
14598 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
14599 seeking_return = 1;
14600 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
14602 seeking_return = 1;
14603 then_not_else = FALSE;
14606 gcc_unreachable ();
14608 /* See how many insns this branch skips, and what kind of insns. If all
14609 insns are okay, and the label or unconditional branch to the same
14610 label is not too far away, succeed. */
14611 for (insns_skipped = 0;
14612 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
14616 this_insn = next_nonnote_insn (this_insn);
14620 switch (GET_CODE (this_insn))
14623 /* Succeed if it is the target label, otherwise fail since
14624 control falls in from somewhere else. */
14625 if (this_insn == label)
14629 arm_ccfsm_state = 2;
14630 this_insn = next_nonnote_insn (this_insn);
14633 arm_ccfsm_state = 1;
14641 /* Succeed if the following insn is the target label.
14643 If return insns are used then the last insn in a function
14644 will be a barrier. */
14645 this_insn = next_nonnote_insn (this_insn);
14646 if (this_insn && this_insn == label)
14650 arm_ccfsm_state = 2;
14651 this_insn = next_nonnote_insn (this_insn);
14654 arm_ccfsm_state = 1;
14662 /* The AAPCS says that conditional calls should not be
14663 used since they make interworking inefficient (the
14664 linker can't transform BL<cond> into BLX). That's
14665 only a problem if the machine has BLX. */
14672 /* Succeed if the following insn is the target label, or
14673 if the following two insns are a barrier and the
14675 this_insn = next_nonnote_insn (this_insn);
14676 if (this_insn && GET_CODE (this_insn) == BARRIER)
14677 this_insn = next_nonnote_insn (this_insn);
14679 if (this_insn && this_insn == label
14680 && insns_skipped < max_insns_skipped)
14684 arm_ccfsm_state = 2;
14685 this_insn = next_nonnote_insn (this_insn);
14688 arm_ccfsm_state = 1;
14696 /* If this is an unconditional branch to the same label, succeed.
14697 If it is to another label, do nothing. If it is conditional,
14699 /* XXX Probably, the tests for SET and the PC are
14702 scanbody = PATTERN (this_insn);
14703 if (GET_CODE (scanbody) == SET
14704 && GET_CODE (SET_DEST (scanbody)) == PC)
14706 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
14707 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
14709 arm_ccfsm_state = 2;
14712 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
14715 /* Fail if a conditional return is undesirable (e.g. on a
14716 StrongARM), but still allow this if optimizing for size. */
14717 else if (GET_CODE (scanbody) == RETURN
14718 && !use_return_insn (TRUE, NULL)
14721 else if (GET_CODE (scanbody) == RETURN
14724 arm_ccfsm_state = 2;
14727 else if (GET_CODE (scanbody) == PARALLEL)
14729 switch (get_attr_conds (this_insn))
14739 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
14744 /* Instructions using or affecting the condition codes make it
14746 scanbody = PATTERN (this_insn);
14747 if (!(GET_CODE (scanbody) == SET
14748 || GET_CODE (scanbody) == PARALLEL)
14749 || get_attr_conds (this_insn) != CONDS_NOCOND)
14752 /* A conditional cirrus instruction must be followed by
14753 a non Cirrus instruction. However, since we
14754 conditionalize instructions in this function and by
14755 the time we get here we can't add instructions
14756 (nops), because shorten_branches() has already been
14757 called, we will disable conditionalizing Cirrus
14758 instructions to be safe. */
14759 if (GET_CODE (scanbody) != USE
14760 && GET_CODE (scanbody) != CLOBBER
14761 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
14771 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
14772 arm_target_label = CODE_LABEL_NUMBER (label);
14775 gcc_assert (seeking_return || arm_ccfsm_state == 2);
14777 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
14779 this_insn = next_nonnote_insn (this_insn);
14780 gcc_assert (!this_insn
14781 || (GET_CODE (this_insn) != BARRIER
14782 && GET_CODE (this_insn) != CODE_LABEL));
14786 /* Oh, dear! we ran off the end.. give up. */
14787 extract_constrain_insn_cached (insn);
14788 arm_ccfsm_state = 0;
14789 arm_target_insn = NULL;
14792 arm_target_insn = this_insn;
14796 gcc_assert (!reverse);
14798 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
14800 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
14801 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14802 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
14803 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14807 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
14810 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
14814 if (reverse || then_not_else)
14815 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14818 /* Restore recog_data (getting the attributes of other insns can
14819 destroy this array, but final.c assumes that it remains intact
14820 across this call. */
14821 extract_constrain_insn_cached (insn);
14825 /* Output IT instructions. */
14827 thumb2_asm_output_opcode (FILE * stream)
14832 if (arm_condexec_mask)
14834 for (n = 0; n < arm_condexec_masklen; n++)
14835 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
14837 asm_fprintf(stream, "i%s\t%s\n\t", buff,
14838 arm_condition_codes[arm_current_cc]);
14839 arm_condexec_mask = 0;
14843 /* Returns true if REGNO is a valid register
14844 for holding a quantity of type MODE. */
14846 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
14848 if (GET_MODE_CLASS (mode) == MODE_CC)
14849 return (regno == CC_REGNUM
14850 || (TARGET_HARD_FLOAT && TARGET_VFP
14851 && regno == VFPCC_REGNUM));
14854 /* For the Thumb we only allow values bigger than SImode in
14855 registers 0 - 6, so that there is always a second low
14856 register available to hold the upper part of the value.
14857 We probably we ought to ensure that the register is the
14858 start of an even numbered register pair. */
14859 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
14861 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
14862 && IS_CIRRUS_REGNUM (regno))
14863 /* We have outlawed SI values in Cirrus registers because they
14864 reside in the lower 32 bits, but SF values reside in the
14865 upper 32 bits. This causes gcc all sorts of grief. We can't
14866 even split the registers into pairs because Cirrus SI values
14867 get sign extended to 64bits-- aldyh. */
14868 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
14870 if (TARGET_HARD_FLOAT && TARGET_VFP
14871 && IS_VFP_REGNUM (regno))
14873 if (mode == SFmode || mode == SImode)
14874 return VFP_REGNO_OK_FOR_SINGLE (regno);
14876 if (mode == DFmode)
14877 return VFP_REGNO_OK_FOR_DOUBLE (regno);
14879 /* VFP registers can hold HFmode values, but there is no point in
14880 putting them there unless we have the NEON extensions for
14881 loading/storing them, too. */
14882 if (mode == HFmode)
14883 return TARGET_NEON_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
14886 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
14887 || (VALID_NEON_QREG_MODE (mode)
14888 && NEON_REGNO_OK_FOR_QUAD (regno))
14889 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
14890 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
14891 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
14892 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
14893 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
14898 if (TARGET_REALLY_IWMMXT)
14900 if (IS_IWMMXT_GR_REGNUM (regno))
14901 return mode == SImode;
14903 if (IS_IWMMXT_REGNUM (regno))
14904 return VALID_IWMMXT_REG_MODE (mode);
14907 /* We allow almost any value to be stored in the general registers.
14908 Restrict doubleword quantities to even register pairs so that we can
14909 use ldrd. Do not allow very large Neon structure opaque modes in
14910 general registers; they would use too many. */
14911 if (regno <= LAST_ARM_REGNUM)
14912 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
14913 && ARM_NUM_REGS (mode) <= 4;
14915 if (regno == FRAME_POINTER_REGNUM
14916 || regno == ARG_POINTER_REGNUM)
14917 /* We only allow integers in the fake hard registers. */
14918 return GET_MODE_CLASS (mode) == MODE_INT;
14920 /* The only registers left are the FPA registers
14921 which we only allow to hold FP values. */
14922 return (TARGET_HARD_FLOAT && TARGET_FPA
14923 && GET_MODE_CLASS (mode) == MODE_FLOAT
14924 && regno >= FIRST_FPA_REGNUM
14925 && regno <= LAST_FPA_REGNUM);
14928 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
14929 not used in arm mode. */
14932 arm_regno_class (int regno)
14936 if (regno == STACK_POINTER_REGNUM)
14938 if (regno == CC_REGNUM)
14945 if (TARGET_THUMB2 && regno < 8)
14948 if ( regno <= LAST_ARM_REGNUM
14949 || regno == FRAME_POINTER_REGNUM
14950 || regno == ARG_POINTER_REGNUM)
14951 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
14953 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
14954 return TARGET_THUMB2 ? CC_REG : NO_REGS;
14956 if (IS_CIRRUS_REGNUM (regno))
14957 return CIRRUS_REGS;
14959 if (IS_VFP_REGNUM (regno))
14961 if (regno <= D7_VFP_REGNUM)
14962 return VFP_D0_D7_REGS;
14963 else if (regno <= LAST_LO_VFP_REGNUM)
14964 return VFP_LO_REGS;
14966 return VFP_HI_REGS;
14969 if (IS_IWMMXT_REGNUM (regno))
14970 return IWMMXT_REGS;
14972 if (IS_IWMMXT_GR_REGNUM (regno))
14973 return IWMMXT_GR_REGS;
14978 /* Handle a special case when computing the offset
14979 of an argument from the frame pointer. */
14981 arm_debugger_arg_offset (int value, rtx addr)
14985 /* We are only interested if dbxout_parms() failed to compute the offset. */
14989 /* We can only cope with the case where the address is held in a register. */
14990 if (GET_CODE (addr) != REG)
14993 /* If we are using the frame pointer to point at the argument, then
14994 an offset of 0 is correct. */
14995 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
14998 /* If we are using the stack pointer to point at the
14999 argument, then an offset of 0 is correct. */
15000 /* ??? Check this is consistent with thumb2 frame layout. */
15001 if ((TARGET_THUMB || !frame_pointer_needed)
15002 && REGNO (addr) == SP_REGNUM)
15005 /* Oh dear. The argument is pointed to by a register rather
15006 than being held in a register, or being stored at a known
15007 offset from the frame pointer. Since GDB only understands
15008 those two kinds of argument we must translate the address
15009 held in the register into an offset from the frame pointer.
15010 We do this by searching through the insns for the function
15011 looking to see where this register gets its value. If the
15012 register is initialized from the frame pointer plus an offset
15013 then we are in luck and we can continue, otherwise we give up.
15015 This code is exercised by producing debugging information
15016 for a function with arguments like this:
15018 double func (double a, double b, int c, double d) {return d;}
15020 Without this code the stab for parameter 'd' will be set to
15021 an offset of 0 from the frame pointer, rather than 8. */
15023 /* The if() statement says:
15025 If the insn is a normal instruction
15026 and if the insn is setting the value in a register
15027 and if the register being set is the register holding the address of the argument
15028 and if the address is computing by an addition
15029 that involves adding to a register
15030 which is the frame pointer
15035 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15037 if ( GET_CODE (insn) == INSN
15038 && GET_CODE (PATTERN (insn)) == SET
15039 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
15040 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
15041 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
15042 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
15043 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
15046 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
15055 warning (0, "unable to compute real location of stacked parameter");
15056 value = 8; /* XXX magic hack */
15062 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
15065 if ((MASK) & insn_flags) \
15066 add_builtin_function ((NAME), (TYPE), (CODE), \
15067 BUILT_IN_MD, NULL, NULL_TREE); \
15071 struct builtin_description
15073 const unsigned int mask;
15074 const enum insn_code icode;
15075 const char * const name;
15076 const enum arm_builtins code;
15077 const enum rtx_code comparison;
15078 const unsigned int flag;
15081 static const struct builtin_description bdesc_2arg[] =
15083 #define IWMMXT_BUILTIN(code, string, builtin) \
15084 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
15085 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
15087 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
15088 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
15089 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
15090 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
15091 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
15092 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
15093 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
15094 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
15095 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
15096 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
15097 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
15098 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
15099 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
15100 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
15101 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
15102 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
15103 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
15104 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
15105 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
15106 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
15107 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
15108 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
15109 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
15110 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
15111 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
15112 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
15113 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
15114 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
15115 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
15116 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
15117 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
15118 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
15119 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
15120 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
15121 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
15122 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
15123 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
15124 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
15125 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
15126 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
15127 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
15128 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
15129 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
15130 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
15131 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
15132 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
15133 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
15134 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
15135 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
15136 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
15137 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
15138 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
15139 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
15140 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
15141 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
15142 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
15143 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
15144 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
15146 #define IWMMXT_BUILTIN2(code, builtin) \
15147 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
15149 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
15150 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
15151 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
15152 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
15153 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
15154 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
15155 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
15156 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
15157 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
15158 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
15159 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
15160 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
15161 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
15162 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
15163 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
15164 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
15165 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
15166 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
15167 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
15168 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
15169 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
15170 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
15171 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
15172 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
15173 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
15174 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
15175 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
15176 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
15177 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
15178 IWMMXT_BUILTIN2 (rordi3, WRORDI)
15179 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
15180 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
15183 static const struct builtin_description bdesc_1arg[] =
15185 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
15186 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
15187 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
15188 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
15189 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
15190 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
15191 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
15192 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
15193 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
15194 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
15195 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
15196 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
15197 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
15198 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
15199 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
15200 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
15201 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
15202 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
15205 /* Set up all the iWMMXt builtins. This is
15206 not called if TARGET_IWMMXT is zero. */
15209 arm_init_iwmmxt_builtins (void)
15211 const struct builtin_description * d;
15213 tree endlink = void_list_node;
15215 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15216 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15217 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15220 = build_function_type (integer_type_node,
15221 tree_cons (NULL_TREE, integer_type_node, endlink));
15222 tree v8qi_ftype_v8qi_v8qi_int
15223 = build_function_type (V8QI_type_node,
15224 tree_cons (NULL_TREE, V8QI_type_node,
15225 tree_cons (NULL_TREE, V8QI_type_node,
15226 tree_cons (NULL_TREE,
15229 tree v4hi_ftype_v4hi_int
15230 = build_function_type (V4HI_type_node,
15231 tree_cons (NULL_TREE, V4HI_type_node,
15232 tree_cons (NULL_TREE, integer_type_node,
15234 tree v2si_ftype_v2si_int
15235 = build_function_type (V2SI_type_node,
15236 tree_cons (NULL_TREE, V2SI_type_node,
15237 tree_cons (NULL_TREE, integer_type_node,
15239 tree v2si_ftype_di_di
15240 = build_function_type (V2SI_type_node,
15241 tree_cons (NULL_TREE, long_long_integer_type_node,
15242 tree_cons (NULL_TREE, long_long_integer_type_node,
15244 tree di_ftype_di_int
15245 = build_function_type (long_long_integer_type_node,
15246 tree_cons (NULL_TREE, long_long_integer_type_node,
15247 tree_cons (NULL_TREE, integer_type_node,
15249 tree di_ftype_di_int_int
15250 = build_function_type (long_long_integer_type_node,
15251 tree_cons (NULL_TREE, long_long_integer_type_node,
15252 tree_cons (NULL_TREE, integer_type_node,
15253 tree_cons (NULL_TREE,
15256 tree int_ftype_v8qi
15257 = build_function_type (integer_type_node,
15258 tree_cons (NULL_TREE, V8QI_type_node,
15260 tree int_ftype_v4hi
15261 = build_function_type (integer_type_node,
15262 tree_cons (NULL_TREE, V4HI_type_node,
15264 tree int_ftype_v2si
15265 = build_function_type (integer_type_node,
15266 tree_cons (NULL_TREE, V2SI_type_node,
15268 tree int_ftype_v8qi_int
15269 = build_function_type (integer_type_node,
15270 tree_cons (NULL_TREE, V8QI_type_node,
15271 tree_cons (NULL_TREE, integer_type_node,
15273 tree int_ftype_v4hi_int
15274 = build_function_type (integer_type_node,
15275 tree_cons (NULL_TREE, V4HI_type_node,
15276 tree_cons (NULL_TREE, integer_type_node,
15278 tree int_ftype_v2si_int
15279 = build_function_type (integer_type_node,
15280 tree_cons (NULL_TREE, V2SI_type_node,
15281 tree_cons (NULL_TREE, integer_type_node,
15283 tree v8qi_ftype_v8qi_int_int
15284 = build_function_type (V8QI_type_node,
15285 tree_cons (NULL_TREE, V8QI_type_node,
15286 tree_cons (NULL_TREE, integer_type_node,
15287 tree_cons (NULL_TREE,
15290 tree v4hi_ftype_v4hi_int_int
15291 = build_function_type (V4HI_type_node,
15292 tree_cons (NULL_TREE, V4HI_type_node,
15293 tree_cons (NULL_TREE, integer_type_node,
15294 tree_cons (NULL_TREE,
15297 tree v2si_ftype_v2si_int_int
15298 = build_function_type (V2SI_type_node,
15299 tree_cons (NULL_TREE, V2SI_type_node,
15300 tree_cons (NULL_TREE, integer_type_node,
15301 tree_cons (NULL_TREE,
15304 /* Miscellaneous. */
15305 tree v8qi_ftype_v4hi_v4hi
15306 = build_function_type (V8QI_type_node,
15307 tree_cons (NULL_TREE, V4HI_type_node,
15308 tree_cons (NULL_TREE, V4HI_type_node,
15310 tree v4hi_ftype_v2si_v2si
15311 = build_function_type (V4HI_type_node,
15312 tree_cons (NULL_TREE, V2SI_type_node,
15313 tree_cons (NULL_TREE, V2SI_type_node,
15315 tree v2si_ftype_v4hi_v4hi
15316 = build_function_type (V2SI_type_node,
15317 tree_cons (NULL_TREE, V4HI_type_node,
15318 tree_cons (NULL_TREE, V4HI_type_node,
15320 tree v2si_ftype_v8qi_v8qi
15321 = build_function_type (V2SI_type_node,
15322 tree_cons (NULL_TREE, V8QI_type_node,
15323 tree_cons (NULL_TREE, V8QI_type_node,
15325 tree v4hi_ftype_v4hi_di
15326 = build_function_type (V4HI_type_node,
15327 tree_cons (NULL_TREE, V4HI_type_node,
15328 tree_cons (NULL_TREE,
15329 long_long_integer_type_node,
15331 tree v2si_ftype_v2si_di
15332 = build_function_type (V2SI_type_node,
15333 tree_cons (NULL_TREE, V2SI_type_node,
15334 tree_cons (NULL_TREE,
15335 long_long_integer_type_node,
15337 tree void_ftype_int_int
15338 = build_function_type (void_type_node,
15339 tree_cons (NULL_TREE, integer_type_node,
15340 tree_cons (NULL_TREE, integer_type_node,
15343 = build_function_type (long_long_unsigned_type_node, endlink);
15345 = build_function_type (long_long_integer_type_node,
15346 tree_cons (NULL_TREE, V8QI_type_node,
15349 = build_function_type (long_long_integer_type_node,
15350 tree_cons (NULL_TREE, V4HI_type_node,
15353 = build_function_type (long_long_integer_type_node,
15354 tree_cons (NULL_TREE, V2SI_type_node,
15356 tree v2si_ftype_v4hi
15357 = build_function_type (V2SI_type_node,
15358 tree_cons (NULL_TREE, V4HI_type_node,
15360 tree v4hi_ftype_v8qi
15361 = build_function_type (V4HI_type_node,
15362 tree_cons (NULL_TREE, V8QI_type_node,
15365 tree di_ftype_di_v4hi_v4hi
15366 = build_function_type (long_long_unsigned_type_node,
15367 tree_cons (NULL_TREE,
15368 long_long_unsigned_type_node,
15369 tree_cons (NULL_TREE, V4HI_type_node,
15370 tree_cons (NULL_TREE,
15374 tree di_ftype_v4hi_v4hi
15375 = build_function_type (long_long_unsigned_type_node,
15376 tree_cons (NULL_TREE, V4HI_type_node,
15377 tree_cons (NULL_TREE, V4HI_type_node,
15380 /* Normal vector binops. */
15381 tree v8qi_ftype_v8qi_v8qi
15382 = build_function_type (V8QI_type_node,
15383 tree_cons (NULL_TREE, V8QI_type_node,
15384 tree_cons (NULL_TREE, V8QI_type_node,
15386 tree v4hi_ftype_v4hi_v4hi
15387 = build_function_type (V4HI_type_node,
15388 tree_cons (NULL_TREE, V4HI_type_node,
15389 tree_cons (NULL_TREE, V4HI_type_node,
15391 tree v2si_ftype_v2si_v2si
15392 = build_function_type (V2SI_type_node,
15393 tree_cons (NULL_TREE, V2SI_type_node,
15394 tree_cons (NULL_TREE, V2SI_type_node,
15396 tree di_ftype_di_di
15397 = build_function_type (long_long_unsigned_type_node,
15398 tree_cons (NULL_TREE, long_long_unsigned_type_node,
15399 tree_cons (NULL_TREE,
15400 long_long_unsigned_type_node,
15403 /* Add all builtins that are more or less simple operations on two
15405 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15407 /* Use one of the operands; the target can have a different mode for
15408 mask-generating compares. */
15409 enum machine_mode mode;
15415 mode = insn_data[d->icode].operand[1].mode;
15420 type = v8qi_ftype_v8qi_v8qi;
15423 type = v4hi_ftype_v4hi_v4hi;
15426 type = v2si_ftype_v2si_v2si;
15429 type = di_ftype_di_di;
15433 gcc_unreachable ();
15436 def_mbuiltin (d->mask, d->name, type, d->code);
15439 /* Add the remaining MMX insns with somewhat more complicated types. */
15440 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
15441 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
15442 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
15444 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
15445 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
15446 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
15447 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
15448 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
15449 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
15451 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
15452 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
15453 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
15454 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
15455 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
15456 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
15458 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
15459 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
15460 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
15461 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
15462 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
15463 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
15465 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
15466 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
15467 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
15468 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
15469 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
15470 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
15472 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
15474 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
15475 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
15476 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
15477 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
15479 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
15480 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
15481 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
15482 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
15483 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
15484 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
15485 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
15486 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
15487 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
15489 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
15490 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
15491 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
15493 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
15494 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
15495 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
15497 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
15498 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
15499 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
15500 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
15501 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
15502 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
15504 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
15505 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
15506 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
15507 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
15508 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
15509 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
15510 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
15511 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
15512 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
15513 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
15514 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
15515 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
15517 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
15518 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
15519 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
15520 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
15522 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
15523 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
15524 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
15525 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
15526 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
15527 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
15528 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
15532 arm_init_tls_builtins (void)
15536 ftype = build_function_type (ptr_type_node, void_list_node);
15537 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
15538 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
15540 TREE_NOTHROW (decl) = 1;
15541 TREE_READONLY (decl) = 1;
15544 enum neon_builtin_type_bits {
15560 #define v8qi_UP T_V8QI
15561 #define v4hi_UP T_V4HI
15562 #define v2si_UP T_V2SI
15563 #define v2sf_UP T_V2SF
15565 #define v16qi_UP T_V16QI
15566 #define v8hi_UP T_V8HI
15567 #define v4si_UP T_V4SI
15568 #define v4sf_UP T_V4SF
15569 #define v2di_UP T_V2DI
15574 #define UP(X) X##_UP
15609 NEON_LOADSTRUCTLANE,
15611 NEON_STORESTRUCTLANE,
15620 const neon_itype itype;
15622 const enum insn_code codes[T_MAX];
15623 const unsigned int num_vars;
15624 unsigned int base_fcode;
15625 } neon_builtin_datum;
15627 #define CF(N,X) CODE_FOR_neon_##N##X
15629 #define VAR1(T, N, A) \
15630 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
15631 #define VAR2(T, N, A, B) \
15632 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
15633 #define VAR3(T, N, A, B, C) \
15634 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
15635 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
15636 #define VAR4(T, N, A, B, C, D) \
15637 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
15638 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
15639 #define VAR5(T, N, A, B, C, D, E) \
15640 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
15641 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
15642 #define VAR6(T, N, A, B, C, D, E, F) \
15643 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
15644 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
15645 #define VAR7(T, N, A, B, C, D, E, F, G) \
15646 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
15647 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15649 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
15650 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
15652 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15653 CF (N, G), CF (N, H) }, 8, 0
15654 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
15655 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
15656 | UP (H) | UP (I), \
15657 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15658 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
15659 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
15660 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
15661 | UP (H) | UP (I) | UP (J), \
15662 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15663 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
15665 /* The mode entries in the following table correspond to the "key" type of the
15666 instruction variant, i.e. equivalent to that which would be specified after
15667 the assembler mnemonic, which usually refers to the last vector operand.
15668 (Signed/unsigned/polynomial types are not differentiated between though, and
15669 are all mapped onto the same mode for a given element size.) The modes
15670 listed per instruction should be the same as those defined for that
15671 instruction's pattern in neon.md.
15672 WARNING: Variants should be listed in the same increasing order as
15673 neon_builtin_type_bits. */
15675 static neon_builtin_datum neon_builtin_data[] =
15677 { VAR10 (BINOP, vadd,
15678 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15679 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
15680 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
15681 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15682 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15683 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
15684 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15685 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15686 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
15687 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15688 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
15689 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
15690 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
15691 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
15692 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
15693 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
15694 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
15695 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
15696 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
15697 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
15698 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
15699 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
15700 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15701 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15702 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15703 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
15704 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
15705 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
15706 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15707 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15708 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15709 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
15710 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15711 { VAR10 (BINOP, vsub,
15712 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15713 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
15714 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
15715 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15716 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15717 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
15718 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15719 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15720 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15721 { VAR2 (BINOP, vcage, v2sf, v4sf) },
15722 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
15723 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15724 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15725 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
15726 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15727 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
15728 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15729 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15730 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
15731 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15732 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15733 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
15734 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
15735 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
15736 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
15737 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15738 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15739 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15740 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15741 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15742 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15743 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15744 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15745 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
15746 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
15747 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
15748 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15749 /* FIXME: vget_lane supports more variants than this! */
15750 { VAR10 (GETLANE, vget_lane,
15751 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15752 { VAR10 (SETLANE, vset_lane,
15753 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15754 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
15755 { VAR10 (DUP, vdup_n,
15756 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15757 { VAR10 (DUPLANE, vdup_lane,
15758 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15759 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
15760 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
15761 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
15762 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
15763 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
15764 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
15765 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
15766 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15767 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15768 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
15769 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
15770 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15771 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
15772 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
15773 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15774 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15775 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
15776 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
15777 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15778 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
15779 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
15780 { VAR10 (BINOP, vext,
15781 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15782 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15783 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
15784 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
15785 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
15786 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
15787 { VAR10 (SELECT, vbsl,
15788 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15789 { VAR1 (VTBL, vtbl1, v8qi) },
15790 { VAR1 (VTBL, vtbl2, v8qi) },
15791 { VAR1 (VTBL, vtbl3, v8qi) },
15792 { VAR1 (VTBL, vtbl4, v8qi) },
15793 { VAR1 (VTBX, vtbx1, v8qi) },
15794 { VAR1 (VTBX, vtbx2, v8qi) },
15795 { VAR1 (VTBX, vtbx3, v8qi) },
15796 { VAR1 (VTBX, vtbx4, v8qi) },
15797 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15798 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15799 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15800 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
15801 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
15802 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
15803 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
15804 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
15805 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
15806 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
15807 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
15808 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
15809 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
15810 { VAR10 (LOAD1, vld1,
15811 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15812 { VAR10 (LOAD1LANE, vld1_lane,
15813 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15814 { VAR10 (LOAD1, vld1_dup,
15815 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15816 { VAR10 (STORE1, vst1,
15817 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15818 { VAR10 (STORE1LANE, vst1_lane,
15819 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15820 { VAR9 (LOADSTRUCT,
15821 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15822 { VAR7 (LOADSTRUCTLANE, vld2_lane,
15823 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15824 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
15825 { VAR9 (STORESTRUCT, vst2,
15826 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15827 { VAR7 (STORESTRUCTLANE, vst2_lane,
15828 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15829 { VAR9 (LOADSTRUCT,
15830 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15831 { VAR7 (LOADSTRUCTLANE, vld3_lane,
15832 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15833 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
15834 { VAR9 (STORESTRUCT, vst3,
15835 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15836 { VAR7 (STORESTRUCTLANE, vst3_lane,
15837 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15838 { VAR9 (LOADSTRUCT, vld4,
15839 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15840 { VAR7 (LOADSTRUCTLANE, vld4_lane,
15841 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15842 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
15843 { VAR9 (STORESTRUCT, vst4,
15844 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15845 { VAR7 (STORESTRUCTLANE, vst4_lane,
15846 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15847 { VAR10 (LOGICBINOP, vand,
15848 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15849 { VAR10 (LOGICBINOP, vorr,
15850 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15851 { VAR10 (BINOP, veor,
15852 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15853 { VAR10 (LOGICBINOP, vbic,
15854 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15855 { VAR10 (LOGICBINOP, vorn,
15856 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
15872 arm_init_neon_builtins (void)
15874 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
15876 tree neon_intQI_type_node;
15877 tree neon_intHI_type_node;
15878 tree neon_polyQI_type_node;
15879 tree neon_polyHI_type_node;
15880 tree neon_intSI_type_node;
15881 tree neon_intDI_type_node;
15882 tree neon_float_type_node;
15884 tree intQI_pointer_node;
15885 tree intHI_pointer_node;
15886 tree intSI_pointer_node;
15887 tree intDI_pointer_node;
15888 tree float_pointer_node;
15890 tree const_intQI_node;
15891 tree const_intHI_node;
15892 tree const_intSI_node;
15893 tree const_intDI_node;
15894 tree const_float_node;
15896 tree const_intQI_pointer_node;
15897 tree const_intHI_pointer_node;
15898 tree const_intSI_pointer_node;
15899 tree const_intDI_pointer_node;
15900 tree const_float_pointer_node;
15902 tree V8QI_type_node;
15903 tree V4HI_type_node;
15904 tree V2SI_type_node;
15905 tree V2SF_type_node;
15906 tree V16QI_type_node;
15907 tree V8HI_type_node;
15908 tree V4SI_type_node;
15909 tree V4SF_type_node;
15910 tree V2DI_type_node;
15912 tree intUQI_type_node;
15913 tree intUHI_type_node;
15914 tree intUSI_type_node;
15915 tree intUDI_type_node;
15917 tree intEI_type_node;
15918 tree intOI_type_node;
15919 tree intCI_type_node;
15920 tree intXI_type_node;
15922 tree V8QI_pointer_node;
15923 tree V4HI_pointer_node;
15924 tree V2SI_pointer_node;
15925 tree V2SF_pointer_node;
15926 tree V16QI_pointer_node;
15927 tree V8HI_pointer_node;
15928 tree V4SI_pointer_node;
15929 tree V4SF_pointer_node;
15930 tree V2DI_pointer_node;
15932 tree void_ftype_pv8qi_v8qi_v8qi;
15933 tree void_ftype_pv4hi_v4hi_v4hi;
15934 tree void_ftype_pv2si_v2si_v2si;
15935 tree void_ftype_pv2sf_v2sf_v2sf;
15936 tree void_ftype_pdi_di_di;
15937 tree void_ftype_pv16qi_v16qi_v16qi;
15938 tree void_ftype_pv8hi_v8hi_v8hi;
15939 tree void_ftype_pv4si_v4si_v4si;
15940 tree void_ftype_pv4sf_v4sf_v4sf;
15941 tree void_ftype_pv2di_v2di_v2di;
15943 tree reinterp_ftype_dreg[5][5];
15944 tree reinterp_ftype_qreg[5][5];
15945 tree dreg_types[5], qreg_types[5];
15947 /* Create distinguished type nodes for NEON vector element types,
15948 and pointers to values of such types, so we can detect them later. */
15949 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15950 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15951 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15952 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15953 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
15954 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
15955 neon_float_type_node = make_node (REAL_TYPE);
15956 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
15957 layout_type (neon_float_type_node);
15959 /* Define typedefs which exactly correspond to the modes we are basing vector
15960 types on. If you change these names you'll need to change
15961 the table used by arm_mangle_type too. */
15962 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
15963 "__builtin_neon_qi");
15964 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
15965 "__builtin_neon_hi");
15966 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
15967 "__builtin_neon_si");
15968 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
15969 "__builtin_neon_sf");
15970 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
15971 "__builtin_neon_di");
15972 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
15973 "__builtin_neon_poly8");
15974 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
15975 "__builtin_neon_poly16");
15977 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
15978 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
15979 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
15980 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
15981 float_pointer_node = build_pointer_type (neon_float_type_node);
15983 /* Next create constant-qualified versions of the above types. */
15984 const_intQI_node = build_qualified_type (neon_intQI_type_node,
15986 const_intHI_node = build_qualified_type (neon_intHI_type_node,
15988 const_intSI_node = build_qualified_type (neon_intSI_type_node,
15990 const_intDI_node = build_qualified_type (neon_intDI_type_node,
15992 const_float_node = build_qualified_type (neon_float_type_node,
15995 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
15996 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
15997 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
15998 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
15999 const_float_pointer_node = build_pointer_type (const_float_node);
16001 /* Now create vector types based on our NEON element types. */
16002 /* 64-bit vectors. */
16004 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
16006 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
16008 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
16010 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
16011 /* 128-bit vectors. */
16013 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
16015 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
16017 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
16019 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
16021 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
16023 /* Unsigned integer types for various mode sizes. */
16024 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
16025 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
16026 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
16027 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
16029 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
16030 "__builtin_neon_uqi");
16031 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
16032 "__builtin_neon_uhi");
16033 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
16034 "__builtin_neon_usi");
16035 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
16036 "__builtin_neon_udi");
16038 /* Opaque integer types for structures of vectors. */
16039 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
16040 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
16041 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
16042 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
16044 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
16045 "__builtin_neon_ti");
16046 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
16047 "__builtin_neon_ei");
16048 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
16049 "__builtin_neon_oi");
16050 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
16051 "__builtin_neon_ci");
16052 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
16053 "__builtin_neon_xi");
16055 /* Pointers to vector types. */
16056 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
16057 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
16058 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
16059 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
16060 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
16061 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
16062 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
16063 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
16064 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
16066 /* Operations which return results as pairs. */
16067 void_ftype_pv8qi_v8qi_v8qi =
16068 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
16069 V8QI_type_node, NULL);
16070 void_ftype_pv4hi_v4hi_v4hi =
16071 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
16072 V4HI_type_node, NULL);
16073 void_ftype_pv2si_v2si_v2si =
16074 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
16075 V2SI_type_node, NULL);
16076 void_ftype_pv2sf_v2sf_v2sf =
16077 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
16078 V2SF_type_node, NULL);
16079 void_ftype_pdi_di_di =
16080 build_function_type_list (void_type_node, intDI_pointer_node,
16081 neon_intDI_type_node, neon_intDI_type_node, NULL);
16082 void_ftype_pv16qi_v16qi_v16qi =
16083 build_function_type_list (void_type_node, V16QI_pointer_node,
16084 V16QI_type_node, V16QI_type_node, NULL);
16085 void_ftype_pv8hi_v8hi_v8hi =
16086 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
16087 V8HI_type_node, NULL);
16088 void_ftype_pv4si_v4si_v4si =
16089 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
16090 V4SI_type_node, NULL);
16091 void_ftype_pv4sf_v4sf_v4sf =
16092 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
16093 V4SF_type_node, NULL);
16094 void_ftype_pv2di_v2di_v2di =
16095 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
16096 V2DI_type_node, NULL);
16098 dreg_types[0] = V8QI_type_node;
16099 dreg_types[1] = V4HI_type_node;
16100 dreg_types[2] = V2SI_type_node;
16101 dreg_types[3] = V2SF_type_node;
16102 dreg_types[4] = neon_intDI_type_node;
16104 qreg_types[0] = V16QI_type_node;
16105 qreg_types[1] = V8HI_type_node;
16106 qreg_types[2] = V4SI_type_node;
16107 qreg_types[3] = V4SF_type_node;
16108 qreg_types[4] = V2DI_type_node;
16110 for (i = 0; i < 5; i++)
16113 for (j = 0; j < 5; j++)
16115 reinterp_ftype_dreg[i][j]
16116 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
16117 reinterp_ftype_qreg[i][j]
16118 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
16122 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
16124 neon_builtin_datum *d = &neon_builtin_data[i];
16125 unsigned int j, codeidx = 0;
16127 d->base_fcode = fcode;
16129 for (j = 0; j < T_MAX; j++)
16131 const char* const modenames[] = {
16132 "v8qi", "v4hi", "v2si", "v2sf", "di",
16133 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
16137 enum insn_code icode;
16138 int is_load = 0, is_store = 0;
16140 if ((d->bits & (1 << j)) == 0)
16143 icode = d->codes[codeidx++];
16148 case NEON_LOAD1LANE:
16149 case NEON_LOADSTRUCT:
16150 case NEON_LOADSTRUCTLANE:
16152 /* Fall through. */
16154 case NEON_STORE1LANE:
16155 case NEON_STORESTRUCT:
16156 case NEON_STORESTRUCTLANE:
16159 /* Fall through. */
16162 case NEON_LOGICBINOP:
16163 case NEON_SHIFTINSERT:
16170 case NEON_SHIFTIMM:
16171 case NEON_SHIFTACC:
16177 case NEON_LANEMULL:
16178 case NEON_LANEMULH:
16180 case NEON_SCALARMUL:
16181 case NEON_SCALARMULL:
16182 case NEON_SCALARMULH:
16183 case NEON_SCALARMAC:
16189 tree return_type = void_type_node, args = void_list_node;
16191 /* Build a function type directly from the insn_data for this
16192 builtin. The build_function_type() function takes care of
16193 removing duplicates for us. */
16194 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
16198 if (is_load && k == 1)
16200 /* Neon load patterns always have the memory operand
16201 (a SImode pointer) in the operand 1 position. We
16202 want a const pointer to the element type in that
16204 gcc_assert (insn_data[icode].operand[k].mode == SImode);
16210 eltype = const_intQI_pointer_node;
16215 eltype = const_intHI_pointer_node;
16220 eltype = const_intSI_pointer_node;
16225 eltype = const_float_pointer_node;
16230 eltype = const_intDI_pointer_node;
16233 default: gcc_unreachable ();
16236 else if (is_store && k == 0)
16238 /* Similarly, Neon store patterns use operand 0 as
16239 the memory location to store to (a SImode pointer).
16240 Use a pointer to the element type of the store in
16242 gcc_assert (insn_data[icode].operand[k].mode == SImode);
16248 eltype = intQI_pointer_node;
16253 eltype = intHI_pointer_node;
16258 eltype = intSI_pointer_node;
16263 eltype = float_pointer_node;
16268 eltype = intDI_pointer_node;
16271 default: gcc_unreachable ();
16276 switch (insn_data[icode].operand[k].mode)
16278 case VOIDmode: eltype = void_type_node; break;
16280 case QImode: eltype = neon_intQI_type_node; break;
16281 case HImode: eltype = neon_intHI_type_node; break;
16282 case SImode: eltype = neon_intSI_type_node; break;
16283 case SFmode: eltype = neon_float_type_node; break;
16284 case DImode: eltype = neon_intDI_type_node; break;
16285 case TImode: eltype = intTI_type_node; break;
16286 case EImode: eltype = intEI_type_node; break;
16287 case OImode: eltype = intOI_type_node; break;
16288 case CImode: eltype = intCI_type_node; break;
16289 case XImode: eltype = intXI_type_node; break;
16290 /* 64-bit vectors. */
16291 case V8QImode: eltype = V8QI_type_node; break;
16292 case V4HImode: eltype = V4HI_type_node; break;
16293 case V2SImode: eltype = V2SI_type_node; break;
16294 case V2SFmode: eltype = V2SF_type_node; break;
16295 /* 128-bit vectors. */
16296 case V16QImode: eltype = V16QI_type_node; break;
16297 case V8HImode: eltype = V8HI_type_node; break;
16298 case V4SImode: eltype = V4SI_type_node; break;
16299 case V4SFmode: eltype = V4SF_type_node; break;
16300 case V2DImode: eltype = V2DI_type_node; break;
16301 default: gcc_unreachable ();
16305 if (k == 0 && !is_store)
16306 return_type = eltype;
16308 args = tree_cons (NULL_TREE, eltype, args);
16311 ftype = build_function_type (return_type, args);
16315 case NEON_RESULTPAIR:
16317 switch (insn_data[icode].operand[1].mode)
16319 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
16320 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
16321 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
16322 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
16323 case DImode: ftype = void_ftype_pdi_di_di; break;
16324 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
16325 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
16326 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
16327 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
16328 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
16329 default: gcc_unreachable ();
16334 case NEON_REINTERP:
16336 /* We iterate over 5 doubleword types, then 5 quadword
16339 switch (insn_data[icode].operand[0].mode)
16341 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
16342 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
16343 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
16344 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
16345 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
16346 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
16347 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
16348 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
16349 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
16350 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
16351 default: gcc_unreachable ();
16357 gcc_unreachable ();
16360 gcc_assert (ftype != NULL);
16362 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
16364 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
16371 arm_init_fp16_builtins (void)
16373 tree fp16_type = make_node (REAL_TYPE);
16374 TYPE_PRECISION (fp16_type) = 16;
16375 layout_type (fp16_type);
16376 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
16380 arm_init_builtins (void)
16382 arm_init_tls_builtins ();
16384 if (TARGET_REALLY_IWMMXT)
16385 arm_init_iwmmxt_builtins ();
16388 arm_init_neon_builtins ();
16390 if (arm_fp16_format)
16391 arm_init_fp16_builtins ();
16394 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
16396 static const char *
16397 arm_invalid_parameter_type (const_tree t)
16399 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
16400 return N_("function parameters cannot have __fp16 type");
16404 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
16406 static const char *
16407 arm_invalid_return_type (const_tree t)
16409 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
16410 return N_("functions cannot return __fp16 type");
16414 /* Implement TARGET_PROMOTED_TYPE. */
16417 arm_promoted_type (const_tree t)
16419 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
16420 return float_type_node;
16424 /* Implement TARGET_CONVERT_TO_TYPE.
16425 Specifically, this hook implements the peculiarity of the ARM
16426 half-precision floating-point C semantics that requires conversions between
16427 __fp16 to or from double to do an intermediate conversion to float. */
16430 arm_convert_to_type (tree type, tree expr)
16432 tree fromtype = TREE_TYPE (expr);
16433 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
16435 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
16436 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
16437 return convert (type, convert (float_type_node, expr));
16441 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
16442 This simply adds HFmode as a supported mode; even though we don't
16443 implement arithmetic on this type directly, it's supported by
16444 optabs conversions, much the way the double-word arithmetic is
16445 special-cased in the default hook. */
16448 arm_scalar_mode_supported_p (enum machine_mode mode)
16450 if (mode == HFmode)
16451 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
16453 return default_scalar_mode_supported_p (mode);
16456 /* Errors in the source file can cause expand_expr to return const0_rtx
16457 where we expect a vector. To avoid crashing, use one of the vector
16458 clear instructions. */
16461 safe_vector_operand (rtx x, enum machine_mode mode)
16463 if (x != const0_rtx)
16465 x = gen_reg_rtx (mode);
16467 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
16468 : gen_rtx_SUBREG (DImode, x, 0)));
16472 /* Subroutine of arm_expand_builtin to take care of binop insns. */
16475 arm_expand_binop_builtin (enum insn_code icode,
16476 tree exp, rtx target)
16479 tree arg0 = CALL_EXPR_ARG (exp, 0);
16480 tree arg1 = CALL_EXPR_ARG (exp, 1);
16481 rtx op0 = expand_normal (arg0);
16482 rtx op1 = expand_normal (arg1);
16483 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16484 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16485 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16487 if (VECTOR_MODE_P (mode0))
16488 op0 = safe_vector_operand (op0, mode0);
16489 if (VECTOR_MODE_P (mode1))
16490 op1 = safe_vector_operand (op1, mode1);
16493 || GET_MODE (target) != tmode
16494 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16495 target = gen_reg_rtx (tmode);
16497 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
16499 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16500 op0 = copy_to_mode_reg (mode0, op0);
16501 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16502 op1 = copy_to_mode_reg (mode1, op1);
16504 pat = GEN_FCN (icode) (target, op0, op1);
16511 /* Subroutine of arm_expand_builtin to take care of unop insns. */
16514 arm_expand_unop_builtin (enum insn_code icode,
16515 tree exp, rtx target, int do_load)
16518 tree arg0 = CALL_EXPR_ARG (exp, 0);
16519 rtx op0 = expand_normal (arg0);
16520 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16521 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16524 || GET_MODE (target) != tmode
16525 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16526 target = gen_reg_rtx (tmode);
16528 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16531 if (VECTOR_MODE_P (mode0))
16532 op0 = safe_vector_operand (op0, mode0);
16534 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16535 op0 = copy_to_mode_reg (mode0, op0);
16538 pat = GEN_FCN (icode) (target, op0);
16546 neon_builtin_compare (const void *a, const void *b)
16548 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
16549 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
16550 unsigned int soughtcode = key->base_fcode;
16552 if (soughtcode >= memb->base_fcode
16553 && soughtcode < memb->base_fcode + memb->num_vars)
16555 else if (soughtcode < memb->base_fcode)
16561 static enum insn_code
16562 locate_neon_builtin_icode (int fcode, neon_itype *itype)
16564 neon_builtin_datum key, *found;
16567 key.base_fcode = fcode;
16568 found = (neon_builtin_datum *)
16569 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
16570 sizeof (neon_builtin_data[0]), neon_builtin_compare);
16571 gcc_assert (found);
16572 idx = fcode - (int) found->base_fcode;
16573 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
16576 *itype = found->itype;
16578 return found->codes[idx];
16582 NEON_ARG_COPY_TO_REG,
16587 #define NEON_MAX_BUILTIN_ARGS 5
16589 /* Expand a Neon builtin. */
16591 arm_expand_neon_args (rtx target, int icode, int have_retval,
16596 tree arg[NEON_MAX_BUILTIN_ARGS];
16597 rtx op[NEON_MAX_BUILTIN_ARGS];
16598 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16599 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
16604 || GET_MODE (target) != tmode
16605 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
16606 target = gen_reg_rtx (tmode);
16608 va_start (ap, exp);
16612 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
16614 if (thisarg == NEON_ARG_STOP)
16618 arg[argc] = CALL_EXPR_ARG (exp, argc);
16619 op[argc] = expand_normal (arg[argc]);
16620 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
16624 case NEON_ARG_COPY_TO_REG:
16625 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
16626 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
16627 (op[argc], mode[argc]))
16628 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
16631 case NEON_ARG_CONSTANT:
16632 /* FIXME: This error message is somewhat unhelpful. */
16633 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
16634 (op[argc], mode[argc]))
16635 error ("argument must be a constant");
16638 case NEON_ARG_STOP:
16639 gcc_unreachable ();
16652 pat = GEN_FCN (icode) (target, op[0]);
16656 pat = GEN_FCN (icode) (target, op[0], op[1]);
16660 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
16664 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
16668 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
16672 gcc_unreachable ();
16678 pat = GEN_FCN (icode) (op[0]);
16682 pat = GEN_FCN (icode) (op[0], op[1]);
16686 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
16690 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
16694 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
16698 gcc_unreachable ();
16709 /* Expand a Neon builtin. These are "special" because they don't have symbolic
16710 constants defined per-instruction or per instruction-variant. Instead, the
16711 required info is looked up in the table neon_builtin_data. */
16713 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
16716 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
16723 return arm_expand_neon_args (target, icode, 1, exp,
16724 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
16728 case NEON_SCALARMUL:
16729 case NEON_SCALARMULL:
16730 case NEON_SCALARMULH:
16731 case NEON_SHIFTINSERT:
16732 case NEON_LOGICBINOP:
16733 return arm_expand_neon_args (target, icode, 1, exp,
16734 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16738 return arm_expand_neon_args (target, icode, 1, exp,
16739 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16740 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16744 case NEON_SHIFTIMM:
16745 return arm_expand_neon_args (target, icode, 1, exp,
16746 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
16750 return arm_expand_neon_args (target, icode, 1, exp,
16751 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16755 case NEON_REINTERP:
16756 return arm_expand_neon_args (target, icode, 1, exp,
16757 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16761 return arm_expand_neon_args (target, icode, 1, exp,
16762 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16764 case NEON_RESULTPAIR:
16765 return arm_expand_neon_args (target, icode, 0, exp,
16766 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16770 case NEON_LANEMULL:
16771 case NEON_LANEMULH:
16772 return arm_expand_neon_args (target, icode, 1, exp,
16773 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16774 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16777 return arm_expand_neon_args (target, icode, 1, exp,
16778 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16779 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
16781 case NEON_SHIFTACC:
16782 return arm_expand_neon_args (target, icode, 1, exp,
16783 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16784 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16786 case NEON_SCALARMAC:
16787 return arm_expand_neon_args (target, icode, 1, exp,
16788 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16789 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16793 return arm_expand_neon_args (target, icode, 1, exp,
16794 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16798 case NEON_LOADSTRUCT:
16799 return arm_expand_neon_args (target, icode, 1, exp,
16800 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16802 case NEON_LOAD1LANE:
16803 case NEON_LOADSTRUCTLANE:
16804 return arm_expand_neon_args (target, icode, 1, exp,
16805 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16809 case NEON_STORESTRUCT:
16810 return arm_expand_neon_args (target, icode, 0, exp,
16811 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16813 case NEON_STORE1LANE:
16814 case NEON_STORESTRUCTLANE:
16815 return arm_expand_neon_args (target, icode, 0, exp,
16816 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16820 gcc_unreachable ();
16823 /* Emit code to reinterpret one Neon type as another, without altering bits. */
16825 neon_reinterpret (rtx dest, rtx src)
16827 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
16830 /* Emit code to place a Neon pair result in memory locations (with equal
16833 neon_emit_pair_result_insn (enum machine_mode mode,
16834 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
16837 rtx mem = gen_rtx_MEM (mode, destaddr);
16838 rtx tmp1 = gen_reg_rtx (mode);
16839 rtx tmp2 = gen_reg_rtx (mode);
16841 emit_insn (intfn (tmp1, op1, tmp2, op2));
16843 emit_move_insn (mem, tmp1);
16844 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
16845 emit_move_insn (mem, tmp2);
16848 /* Set up operands for a register copy from src to dest, taking care not to
16849 clobber registers in the process.
16850 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
16851 be called with a large N, so that should be OK. */
16854 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
16856 unsigned int copied = 0, opctr = 0;
16857 unsigned int done = (1 << count) - 1;
16860 while (copied != done)
16862 for (i = 0; i < count; i++)
16866 for (j = 0; good && j < count; j++)
16867 if (i != j && (copied & (1 << j)) == 0
16868 && reg_overlap_mentioned_p (src[j], dest[i]))
16873 operands[opctr++] = dest[i];
16874 operands[opctr++] = src[i];
16880 gcc_assert (opctr == count * 2);
16883 /* Expand an expression EXP that calls a built-in function,
16884 with result going to TARGET if that's convenient
16885 (and in mode MODE if that's convenient).
16886 SUBTARGET may be used as the target for computing one of EXP's operands.
16887 IGNORE is nonzero if the value is to be ignored. */
16890 arm_expand_builtin (tree exp,
16892 rtx subtarget ATTRIBUTE_UNUSED,
16893 enum machine_mode mode ATTRIBUTE_UNUSED,
16894 int ignore ATTRIBUTE_UNUSED)
16896 const struct builtin_description * d;
16897 enum insn_code icode;
16898 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16906 int fcode = DECL_FUNCTION_CODE (fndecl);
16908 enum machine_mode tmode;
16909 enum machine_mode mode0;
16910 enum machine_mode mode1;
16911 enum machine_mode mode2;
16913 if (fcode >= ARM_BUILTIN_NEON_BASE)
16914 return arm_expand_neon_builtin (fcode, exp, target);
16918 case ARM_BUILTIN_TEXTRMSB:
16919 case ARM_BUILTIN_TEXTRMUB:
16920 case ARM_BUILTIN_TEXTRMSH:
16921 case ARM_BUILTIN_TEXTRMUH:
16922 case ARM_BUILTIN_TEXTRMSW:
16923 case ARM_BUILTIN_TEXTRMUW:
16924 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
16925 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
16926 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
16927 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
16928 : CODE_FOR_iwmmxt_textrmw);
16930 arg0 = CALL_EXPR_ARG (exp, 0);
16931 arg1 = CALL_EXPR_ARG (exp, 1);
16932 op0 = expand_normal (arg0);
16933 op1 = expand_normal (arg1);
16934 tmode = insn_data[icode].operand[0].mode;
16935 mode0 = insn_data[icode].operand[1].mode;
16936 mode1 = insn_data[icode].operand[2].mode;
16938 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16939 op0 = copy_to_mode_reg (mode0, op0);
16940 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16942 /* @@@ better error message */
16943 error ("selector must be an immediate");
16944 return gen_reg_rtx (tmode);
16947 || GET_MODE (target) != tmode
16948 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16949 target = gen_reg_rtx (tmode);
16950 pat = GEN_FCN (icode) (target, op0, op1);
16956 case ARM_BUILTIN_TINSRB:
16957 case ARM_BUILTIN_TINSRH:
16958 case ARM_BUILTIN_TINSRW:
16959 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
16960 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
16961 : CODE_FOR_iwmmxt_tinsrw);
16962 arg0 = CALL_EXPR_ARG (exp, 0);
16963 arg1 = CALL_EXPR_ARG (exp, 1);
16964 arg2 = CALL_EXPR_ARG (exp, 2);
16965 op0 = expand_normal (arg0);
16966 op1 = expand_normal (arg1);
16967 op2 = expand_normal (arg2);
16968 tmode = insn_data[icode].operand[0].mode;
16969 mode0 = insn_data[icode].operand[1].mode;
16970 mode1 = insn_data[icode].operand[2].mode;
16971 mode2 = insn_data[icode].operand[3].mode;
16973 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16974 op0 = copy_to_mode_reg (mode0, op0);
16975 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16976 op1 = copy_to_mode_reg (mode1, op1);
16977 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16979 /* @@@ better error message */
16980 error ("selector must be an immediate");
16984 || GET_MODE (target) != tmode
16985 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16986 target = gen_reg_rtx (tmode);
16987 pat = GEN_FCN (icode) (target, op0, op1, op2);
16993 case ARM_BUILTIN_SETWCX:
16994 arg0 = CALL_EXPR_ARG (exp, 0);
16995 arg1 = CALL_EXPR_ARG (exp, 1);
16996 op0 = force_reg (SImode, expand_normal (arg0));
16997 op1 = expand_normal (arg1);
16998 emit_insn (gen_iwmmxt_tmcr (op1, op0));
17001 case ARM_BUILTIN_GETWCX:
17002 arg0 = CALL_EXPR_ARG (exp, 0);
17003 op0 = expand_normal (arg0);
17004 target = gen_reg_rtx (SImode);
17005 emit_insn (gen_iwmmxt_tmrc (target, op0));
17008 case ARM_BUILTIN_WSHUFH:
17009 icode = CODE_FOR_iwmmxt_wshufh;
17010 arg0 = CALL_EXPR_ARG (exp, 0);
17011 arg1 = CALL_EXPR_ARG (exp, 1);
17012 op0 = expand_normal (arg0);
17013 op1 = expand_normal (arg1);
17014 tmode = insn_data[icode].operand[0].mode;
17015 mode1 = insn_data[icode].operand[1].mode;
17016 mode2 = insn_data[icode].operand[2].mode;
17018 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17019 op0 = copy_to_mode_reg (mode1, op0);
17020 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17022 /* @@@ better error message */
17023 error ("mask must be an immediate");
17027 || GET_MODE (target) != tmode
17028 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17029 target = gen_reg_rtx (tmode);
17030 pat = GEN_FCN (icode) (target, op0, op1);
17036 case ARM_BUILTIN_WSADB:
17037 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
17038 case ARM_BUILTIN_WSADH:
17039 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
17040 case ARM_BUILTIN_WSADBZ:
17041 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
17042 case ARM_BUILTIN_WSADHZ:
17043 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
17045 /* Several three-argument builtins. */
17046 case ARM_BUILTIN_WMACS:
17047 case ARM_BUILTIN_WMACU:
17048 case ARM_BUILTIN_WALIGN:
17049 case ARM_BUILTIN_TMIA:
17050 case ARM_BUILTIN_TMIAPH:
17051 case ARM_BUILTIN_TMIATT:
17052 case ARM_BUILTIN_TMIATB:
17053 case ARM_BUILTIN_TMIABT:
17054 case ARM_BUILTIN_TMIABB:
17055 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
17056 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
17057 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
17058 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
17059 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
17060 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
17061 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
17062 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
17063 : CODE_FOR_iwmmxt_walign);
17064 arg0 = CALL_EXPR_ARG (exp, 0);
17065 arg1 = CALL_EXPR_ARG (exp, 1);
17066 arg2 = CALL_EXPR_ARG (exp, 2);
17067 op0 = expand_normal (arg0);
17068 op1 = expand_normal (arg1);
17069 op2 = expand_normal (arg2);
17070 tmode = insn_data[icode].operand[0].mode;
17071 mode0 = insn_data[icode].operand[1].mode;
17072 mode1 = insn_data[icode].operand[2].mode;
17073 mode2 = insn_data[icode].operand[3].mode;
17075 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17076 op0 = copy_to_mode_reg (mode0, op0);
17077 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
17078 op1 = copy_to_mode_reg (mode1, op1);
17079 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17080 op2 = copy_to_mode_reg (mode2, op2);
17082 || GET_MODE (target) != tmode
17083 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17084 target = gen_reg_rtx (tmode);
17085 pat = GEN_FCN (icode) (target, op0, op1, op2);
17091 case ARM_BUILTIN_WZERO:
17092 target = gen_reg_rtx (DImode);
17093 emit_insn (gen_iwmmxt_clrdi (target));
17096 case ARM_BUILTIN_THREAD_POINTER:
17097 return arm_load_tp (target);
17103 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17104 if (d->code == (const enum arm_builtins) fcode)
17105 return arm_expand_binop_builtin (d->icode, exp, target);
17107 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17108 if (d->code == (const enum arm_builtins) fcode)
17109 return arm_expand_unop_builtin (d->icode, exp, target, 0);
17111 /* @@@ Should really do something sensible here. */
17115 /* Return the number (counting from 0) of
17116 the least significant set bit in MASK. */
17119 number_of_first_bit_set (unsigned mask)
17124 (mask & (1 << bit)) == 0;
17131 /* Emit code to push or pop registers to or from the stack. F is the
17132 assembly file. MASK is the registers to push or pop. PUSH is
17133 nonzero if we should push, and zero if we should pop. For debugging
17134 output, if pushing, adjust CFA_OFFSET by the amount of space added
17135 to the stack. REAL_REGS should have the same number of bits set as
17136 MASK, and will be used instead (in the same order) to describe which
17137 registers were saved - this is used to mark the save slots when we
17138 push high registers after moving them to low registers. */
17140 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
17141 unsigned long real_regs)
17144 int lo_mask = mask & 0xFF;
17145 int pushed_words = 0;
17149 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
17151 /* Special case. Do not generate a POP PC statement here, do it in
17153 thumb_exit (f, -1);
17157 if (ARM_EABI_UNWIND_TABLES && push)
17159 fprintf (f, "\t.save\t{");
17160 for (regno = 0; regno < 15; regno++)
17162 if (real_regs & (1 << regno))
17164 if (real_regs & ((1 << regno) -1))
17166 asm_fprintf (f, "%r", regno);
17169 fprintf (f, "}\n");
17172 fprintf (f, "\t%s\t{", push ? "push" : "pop");
17174 /* Look at the low registers first. */
17175 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
17179 asm_fprintf (f, "%r", regno);
17181 if ((lo_mask & ~1) != 0)
17188 if (push && (mask & (1 << LR_REGNUM)))
17190 /* Catch pushing the LR. */
17194 asm_fprintf (f, "%r", LR_REGNUM);
17198 else if (!push && (mask & (1 << PC_REGNUM)))
17200 /* Catch popping the PC. */
17201 if (TARGET_INTERWORK || TARGET_BACKTRACE
17202 || crtl->calls_eh_return)
17204 /* The PC is never poped directly, instead
17205 it is popped into r3 and then BX is used. */
17206 fprintf (f, "}\n");
17208 thumb_exit (f, -1);
17217 asm_fprintf (f, "%r", PC_REGNUM);
17221 fprintf (f, "}\n");
17223 if (push && pushed_words && dwarf2out_do_frame ())
17225 char *l = dwarf2out_cfi_label (false);
17226 int pushed_mask = real_regs;
17228 *cfa_offset += pushed_words * 4;
17229 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
17232 pushed_mask = real_regs;
17233 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
17235 if (pushed_mask & 1)
17236 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
17241 /* Generate code to return from a thumb function.
17242 If 'reg_containing_return_addr' is -1, then the return address is
17243 actually on the stack, at the stack pointer. */
17245 thumb_exit (FILE *f, int reg_containing_return_addr)
17247 unsigned regs_available_for_popping;
17248 unsigned regs_to_pop;
17250 unsigned available;
17254 int restore_a4 = FALSE;
17256 /* Compute the registers we need to pop. */
17260 if (reg_containing_return_addr == -1)
17262 regs_to_pop |= 1 << LR_REGNUM;
17266 if (TARGET_BACKTRACE)
17268 /* Restore the (ARM) frame pointer and stack pointer. */
17269 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
17273 /* If there is nothing to pop then just emit the BX instruction and
17275 if (pops_needed == 0)
17277 if (crtl->calls_eh_return)
17278 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
17280 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
17283 /* Otherwise if we are not supporting interworking and we have not created
17284 a backtrace structure and the function was not entered in ARM mode then
17285 just pop the return address straight into the PC. */
17286 else if (!TARGET_INTERWORK
17287 && !TARGET_BACKTRACE
17288 && !is_called_in_ARM_mode (current_function_decl)
17289 && !crtl->calls_eh_return)
17291 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
17295 /* Find out how many of the (return) argument registers we can corrupt. */
17296 regs_available_for_popping = 0;
17298 /* If returning via __builtin_eh_return, the bottom three registers
17299 all contain information needed for the return. */
17300 if (crtl->calls_eh_return)
17304 /* If we can deduce the registers used from the function's
17305 return value. This is more reliable that examining
17306 df_regs_ever_live_p () because that will be set if the register is
17307 ever used in the function, not just if the register is used
17308 to hold a return value. */
17310 if (crtl->return_rtx != 0)
17311 mode = GET_MODE (crtl->return_rtx);
17313 mode = DECL_MODE (DECL_RESULT (current_function_decl));
17315 size = GET_MODE_SIZE (mode);
17319 /* In a void function we can use any argument register.
17320 In a function that returns a structure on the stack
17321 we can use the second and third argument registers. */
17322 if (mode == VOIDmode)
17323 regs_available_for_popping =
17324 (1 << ARG_REGISTER (1))
17325 | (1 << ARG_REGISTER (2))
17326 | (1 << ARG_REGISTER (3));
17328 regs_available_for_popping =
17329 (1 << ARG_REGISTER (2))
17330 | (1 << ARG_REGISTER (3));
17332 else if (size <= 4)
17333 regs_available_for_popping =
17334 (1 << ARG_REGISTER (2))
17335 | (1 << ARG_REGISTER (3));
17336 else if (size <= 8)
17337 regs_available_for_popping =
17338 (1 << ARG_REGISTER (3));
17341 /* Match registers to be popped with registers into which we pop them. */
17342 for (available = regs_available_for_popping,
17343 required = regs_to_pop;
17344 required != 0 && available != 0;
17345 available &= ~(available & - available),
17346 required &= ~(required & - required))
17349 /* If we have any popping registers left over, remove them. */
17351 regs_available_for_popping &= ~available;
17353 /* Otherwise if we need another popping register we can use
17354 the fourth argument register. */
17355 else if (pops_needed)
17357 /* If we have not found any free argument registers and
17358 reg a4 contains the return address, we must move it. */
17359 if (regs_available_for_popping == 0
17360 && reg_containing_return_addr == LAST_ARG_REGNUM)
17362 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
17363 reg_containing_return_addr = LR_REGNUM;
17365 else if (size > 12)
17367 /* Register a4 is being used to hold part of the return value,
17368 but we have dire need of a free, low register. */
17371 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
17374 if (reg_containing_return_addr != LAST_ARG_REGNUM)
17376 /* The fourth argument register is available. */
17377 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
17383 /* Pop as many registers as we can. */
17384 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
17385 regs_available_for_popping);
17387 /* Process the registers we popped. */
17388 if (reg_containing_return_addr == -1)
17390 /* The return address was popped into the lowest numbered register. */
17391 regs_to_pop &= ~(1 << LR_REGNUM);
17393 reg_containing_return_addr =
17394 number_of_first_bit_set (regs_available_for_popping);
17396 /* Remove this register for the mask of available registers, so that
17397 the return address will not be corrupted by further pops. */
17398 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
17401 /* If we popped other registers then handle them here. */
17402 if (regs_available_for_popping)
17406 /* Work out which register currently contains the frame pointer. */
17407 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
17409 /* Move it into the correct place. */
17410 asm_fprintf (f, "\tmov\t%r, %r\n",
17411 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
17413 /* (Temporarily) remove it from the mask of popped registers. */
17414 regs_available_for_popping &= ~(1 << frame_pointer);
17415 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
17417 if (regs_available_for_popping)
17421 /* We popped the stack pointer as well,
17422 find the register that contains it. */
17423 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
17425 /* Move it into the stack register. */
17426 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
17428 /* At this point we have popped all necessary registers, so
17429 do not worry about restoring regs_available_for_popping
17430 to its correct value:
17432 assert (pops_needed == 0)
17433 assert (regs_available_for_popping == (1 << frame_pointer))
17434 assert (regs_to_pop == (1 << STACK_POINTER)) */
17438 /* Since we have just move the popped value into the frame
17439 pointer, the popping register is available for reuse, and
17440 we know that we still have the stack pointer left to pop. */
17441 regs_available_for_popping |= (1 << frame_pointer);
17445 /* If we still have registers left on the stack, but we no longer have
17446 any registers into which we can pop them, then we must move the return
17447 address into the link register and make available the register that
17449 if (regs_available_for_popping == 0 && pops_needed > 0)
17451 regs_available_for_popping |= 1 << reg_containing_return_addr;
17453 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
17454 reg_containing_return_addr);
17456 reg_containing_return_addr = LR_REGNUM;
17459 /* If we have registers left on the stack then pop some more.
17460 We know that at most we will want to pop FP and SP. */
17461 if (pops_needed > 0)
17466 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
17467 regs_available_for_popping);
17469 /* We have popped either FP or SP.
17470 Move whichever one it is into the correct register. */
17471 popped_into = number_of_first_bit_set (regs_available_for_popping);
17472 move_to = number_of_first_bit_set (regs_to_pop);
17474 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
17476 regs_to_pop &= ~(1 << move_to);
17481 /* If we still have not popped everything then we must have only
17482 had one register available to us and we are now popping the SP. */
17483 if (pops_needed > 0)
17487 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
17488 regs_available_for_popping);
17490 popped_into = number_of_first_bit_set (regs_available_for_popping);
17492 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
17494 assert (regs_to_pop == (1 << STACK_POINTER))
17495 assert (pops_needed == 1)
17499 /* If necessary restore the a4 register. */
17502 if (reg_containing_return_addr != LR_REGNUM)
17504 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
17505 reg_containing_return_addr = LR_REGNUM;
17508 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
17511 if (crtl->calls_eh_return)
17512 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
17514 /* Return to caller. */
17515 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
17520 thumb1_final_prescan_insn (rtx insn)
17522 if (flag_print_asm_name)
17523 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
17524 INSN_ADDRESSES (INSN_UID (insn)));
17528 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
17530 unsigned HOST_WIDE_INT mask = 0xff;
17533 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
17534 if (val == 0) /* XXX */
17537 for (i = 0; i < 25; i++)
17538 if ((val & (mask << i)) == val)
17544 /* Returns nonzero if the current function contains,
17545 or might contain a far jump. */
17547 thumb_far_jump_used_p (void)
17551 /* This test is only important for leaf functions. */
17552 /* assert (!leaf_function_p ()); */
17554 /* If we have already decided that far jumps may be used,
17555 do not bother checking again, and always return true even if
17556 it turns out that they are not being used. Once we have made
17557 the decision that far jumps are present (and that hence the link
17558 register will be pushed onto the stack) we cannot go back on it. */
17559 if (cfun->machine->far_jump_used)
17562 /* If this function is not being called from the prologue/epilogue
17563 generation code then it must be being called from the
17564 INITIAL_ELIMINATION_OFFSET macro. */
17565 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
17567 /* In this case we know that we are being asked about the elimination
17568 of the arg pointer register. If that register is not being used,
17569 then there are no arguments on the stack, and we do not have to
17570 worry that a far jump might force the prologue to push the link
17571 register, changing the stack offsets. In this case we can just
17572 return false, since the presence of far jumps in the function will
17573 not affect stack offsets.
17575 If the arg pointer is live (or if it was live, but has now been
17576 eliminated and so set to dead) then we do have to test to see if
17577 the function might contain a far jump. This test can lead to some
17578 false negatives, since before reload is completed, then length of
17579 branch instructions is not known, so gcc defaults to returning their
17580 longest length, which in turn sets the far jump attribute to true.
17582 A false negative will not result in bad code being generated, but it
17583 will result in a needless push and pop of the link register. We
17584 hope that this does not occur too often.
17586 If we need doubleword stack alignment this could affect the other
17587 elimination offsets so we can't risk getting it wrong. */
17588 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
17589 cfun->machine->arg_pointer_live = 1;
17590 else if (!cfun->machine->arg_pointer_live)
17594 /* Check to see if the function contains a branch
17595 insn with the far jump attribute set. */
17596 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17598 if (GET_CODE (insn) == JUMP_INSN
17599 /* Ignore tablejump patterns. */
17600 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17601 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
17602 && get_attr_far_jump (insn) == FAR_JUMP_YES
17605 /* Record the fact that we have decided that
17606 the function does use far jumps. */
17607 cfun->machine->far_jump_used = 1;
17615 /* Return nonzero if FUNC must be entered in ARM mode. */
17617 is_called_in_ARM_mode (tree func)
17619 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
17621 /* Ignore the problem about functions whose address is taken. */
17622 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
17626 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
17632 /* The bits which aren't usefully expanded as rtl. */
17634 thumb_unexpanded_epilogue (void)
17636 arm_stack_offsets *offsets;
17638 unsigned long live_regs_mask = 0;
17639 int high_regs_pushed = 0;
17640 int had_to_push_lr;
17643 if (cfun->machine->return_used_this_function != 0)
17646 if (IS_NAKED (arm_current_func_type ()))
17649 offsets = arm_get_frame_offsets ();
17650 live_regs_mask = offsets->saved_regs_mask;
17651 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
17653 /* If we can deduce the registers used from the function's return value.
17654 This is more reliable that examining df_regs_ever_live_p () because that
17655 will be set if the register is ever used in the function, not just if
17656 the register is used to hold a return value. */
17657 size = arm_size_return_regs ();
17659 /* The prolog may have pushed some high registers to use as
17660 work registers. e.g. the testsuite file:
17661 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
17662 compiles to produce:
17663 push {r4, r5, r6, r7, lr}
17667 as part of the prolog. We have to undo that pushing here. */
17669 if (high_regs_pushed)
17671 unsigned long mask = live_regs_mask & 0xff;
17674 /* The available low registers depend on the size of the value we are
17682 /* Oh dear! We have no low registers into which we can pop
17685 ("no low registers available for popping high registers");
17687 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
17688 if (live_regs_mask & (1 << next_hi_reg))
17691 while (high_regs_pushed)
17693 /* Find lo register(s) into which the high register(s) can
17695 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
17697 if (mask & (1 << regno))
17698 high_regs_pushed--;
17699 if (high_regs_pushed == 0)
17703 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
17705 /* Pop the values into the low register(s). */
17706 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
17708 /* Move the value(s) into the high registers. */
17709 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
17711 if (mask & (1 << regno))
17713 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
17716 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
17717 if (live_regs_mask & (1 << next_hi_reg))
17722 live_regs_mask &= ~0x0f00;
17725 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
17726 live_regs_mask &= 0xff;
17728 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
17730 /* Pop the return address into the PC. */
17731 if (had_to_push_lr)
17732 live_regs_mask |= 1 << PC_REGNUM;
17734 /* Either no argument registers were pushed or a backtrace
17735 structure was created which includes an adjusted stack
17736 pointer, so just pop everything. */
17737 if (live_regs_mask)
17738 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
17741 /* We have either just popped the return address into the
17742 PC or it is was kept in LR for the entire function. */
17743 if (!had_to_push_lr)
17744 thumb_exit (asm_out_file, LR_REGNUM);
17748 /* Pop everything but the return address. */
17749 if (live_regs_mask)
17750 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
17753 if (had_to_push_lr)
17757 /* We have no free low regs, so save one. */
17758 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
17762 /* Get the return address into a temporary register. */
17763 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
17764 1 << LAST_ARG_REGNUM);
17768 /* Move the return address to lr. */
17769 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
17771 /* Restore the low register. */
17772 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
17777 regno = LAST_ARG_REGNUM;
17782 /* Remove the argument registers that were pushed onto the stack. */
17783 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
17784 SP_REGNUM, SP_REGNUM,
17785 crtl->args.pretend_args_size);
17787 thumb_exit (asm_out_file, regno);
17793 /* Functions to save and restore machine-specific function data. */
17794 static struct machine_function *
17795 arm_init_machine_status (void)
17797 struct machine_function *machine;
17798 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
17800 #if ARM_FT_UNKNOWN != 0
17801 machine->func_type = ARM_FT_UNKNOWN;
17806 /* Return an RTX indicating where the return address to the
17807 calling function can be found. */
17809 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
17814 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
17817 /* Do anything needed before RTL is emitted for each function. */
17819 arm_init_expanders (void)
17821 /* Arrange to initialize and mark the machine per-function status. */
17822 init_machine_status = arm_init_machine_status;
17824 /* This is to stop the combine pass optimizing away the alignment
17825 adjustment of va_arg. */
17826 /* ??? It is claimed that this should not be necessary. */
17828 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
17832 /* Like arm_compute_initial_elimination offset. Simpler because there
17833 isn't an ABI specified frame pointer for Thumb. Instead, we set it
17834 to point at the base of the local variables after static stack
17835 space for a function has been allocated. */
17838 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
17840 arm_stack_offsets *offsets;
17842 offsets = arm_get_frame_offsets ();
17846 case ARG_POINTER_REGNUM:
17849 case STACK_POINTER_REGNUM:
17850 return offsets->outgoing_args - offsets->saved_args;
17852 case FRAME_POINTER_REGNUM:
17853 return offsets->soft_frame - offsets->saved_args;
17855 case ARM_HARD_FRAME_POINTER_REGNUM:
17856 return offsets->saved_regs - offsets->saved_args;
17858 case THUMB_HARD_FRAME_POINTER_REGNUM:
17859 return offsets->locals_base - offsets->saved_args;
17862 gcc_unreachable ();
17866 case FRAME_POINTER_REGNUM:
17869 case STACK_POINTER_REGNUM:
17870 return offsets->outgoing_args - offsets->soft_frame;
17872 case ARM_HARD_FRAME_POINTER_REGNUM:
17873 return offsets->saved_regs - offsets->soft_frame;
17875 case THUMB_HARD_FRAME_POINTER_REGNUM:
17876 return offsets->locals_base - offsets->soft_frame;
17879 gcc_unreachable ();
17884 gcc_unreachable ();
17888 /* Generate the rest of a function's prologue. */
17890 thumb1_expand_prologue (void)
17894 HOST_WIDE_INT amount;
17895 arm_stack_offsets *offsets;
17896 unsigned long func_type;
17898 unsigned long live_regs_mask;
17900 func_type = arm_current_func_type ();
17902 /* Naked functions don't have prologues. */
17903 if (IS_NAKED (func_type))
17906 if (IS_INTERRUPT (func_type))
17908 error ("interrupt Service Routines cannot be coded in Thumb mode");
17912 offsets = arm_get_frame_offsets ();
17913 live_regs_mask = offsets->saved_regs_mask;
17914 /* Load the pic register before setting the frame pointer,
17915 so we can use r7 as a temporary work register. */
17916 if (flag_pic && arm_pic_register != INVALID_REGNUM)
17917 arm_load_pic_register (live_regs_mask);
17919 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
17920 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
17921 stack_pointer_rtx);
17923 amount = offsets->outgoing_args - offsets->saved_regs;
17928 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17929 GEN_INT (- amount)));
17930 RTX_FRAME_RELATED_P (insn) = 1;
17936 /* The stack decrement is too big for an immediate value in a single
17937 insn. In theory we could issue multiple subtracts, but after
17938 three of them it becomes more space efficient to place the full
17939 value in the constant pool and load into a register. (Also the
17940 ARM debugger really likes to see only one stack decrement per
17941 function). So instead we look for a scratch register into which
17942 we can load the decrement, and then we subtract this from the
17943 stack pointer. Unfortunately on the thumb the only available
17944 scratch registers are the argument registers, and we cannot use
17945 these as they may hold arguments to the function. Instead we
17946 attempt to locate a call preserved register which is used by this
17947 function. If we can find one, then we know that it will have
17948 been pushed at the start of the prologue and so we can corrupt
17950 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
17951 if (live_regs_mask & (1 << regno))
17954 gcc_assert(regno <= LAST_LO_REGNUM);
17956 reg = gen_rtx_REG (SImode, regno);
17958 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
17960 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
17961 stack_pointer_rtx, reg));
17962 RTX_FRAME_RELATED_P (insn) = 1;
17963 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17964 plus_constant (stack_pointer_rtx,
17966 RTX_FRAME_RELATED_P (dwarf) = 1;
17967 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
17971 if (frame_pointer_needed)
17972 thumb_set_frame_pointer (offsets);
17974 /* If we are profiling, make sure no instructions are scheduled before
17975 the call to mcount. Similarly if the user has requested no
17976 scheduling in the prolog. Similarly if we want non-call exceptions
17977 using the EABI unwinder, to prevent faulting instructions from being
17978 swapped with a stack adjustment. */
17979 if (crtl->profile || !TARGET_SCHED_PROLOG
17980 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
17981 emit_insn (gen_blockage ());
17983 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
17984 if (live_regs_mask & 0xff)
17985 cfun->machine->lr_save_eliminated = 0;
17990 thumb1_expand_epilogue (void)
17992 HOST_WIDE_INT amount;
17993 arm_stack_offsets *offsets;
17996 /* Naked functions don't have prologues. */
17997 if (IS_NAKED (arm_current_func_type ()))
18000 offsets = arm_get_frame_offsets ();
18001 amount = offsets->outgoing_args - offsets->saved_regs;
18003 if (frame_pointer_needed)
18005 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
18006 amount = offsets->locals_base - offsets->saved_regs;
18009 gcc_assert (amount >= 0);
18013 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
18014 GEN_INT (amount)));
18017 /* r3 is always free in the epilogue. */
18018 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
18020 emit_insn (gen_movsi (reg, GEN_INT (amount)));
18021 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
18025 /* Emit a USE (stack_pointer_rtx), so that
18026 the stack adjustment will not be deleted. */
18027 emit_insn (gen_prologue_use (stack_pointer_rtx));
18029 if (crtl->profile || !TARGET_SCHED_PROLOG)
18030 emit_insn (gen_blockage ());
18032 /* Emit a clobber for each insn that will be restored in the epilogue,
18033 so that flow2 will get register lifetimes correct. */
18034 for (regno = 0; regno < 13; regno++)
18035 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
18036 emit_clobber (gen_rtx_REG (SImode, regno));
18038 if (! df_regs_ever_live_p (LR_REGNUM))
18039 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
18043 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
18045 arm_stack_offsets *offsets;
18046 unsigned long live_regs_mask = 0;
18047 unsigned long l_mask;
18048 unsigned high_regs_pushed = 0;
18049 int cfa_offset = 0;
18052 if (IS_NAKED (arm_current_func_type ()))
18055 if (is_called_in_ARM_mode (current_function_decl))
18059 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
18060 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
18062 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
18064 /* Generate code sequence to switch us into Thumb mode. */
18065 /* The .code 32 directive has already been emitted by
18066 ASM_DECLARE_FUNCTION_NAME. */
18067 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
18068 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
18070 /* Generate a label, so that the debugger will notice the
18071 change in instruction sets. This label is also used by
18072 the assembler to bypass the ARM code when this function
18073 is called from a Thumb encoded function elsewhere in the
18074 same file. Hence the definition of STUB_NAME here must
18075 agree with the definition in gas/config/tc-arm.c. */
18077 #define STUB_NAME ".real_start_of"
18079 fprintf (f, "\t.code\t16\n");
18081 if (arm_dllexport_name_p (name))
18082 name = arm_strip_name_encoding (name);
18084 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
18085 fprintf (f, "\t.thumb_func\n");
18086 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
18089 if (crtl->args.pretend_args_size)
18091 /* Output unwind directive for the stack adjustment. */
18092 if (ARM_EABI_UNWIND_TABLES)
18093 fprintf (f, "\t.pad #%d\n",
18094 crtl->args.pretend_args_size);
18096 if (cfun->machine->uses_anonymous_args)
18100 fprintf (f, "\tpush\t{");
18102 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
18104 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
18105 regno <= LAST_ARG_REGNUM;
18107 asm_fprintf (f, "%r%s", regno,
18108 regno == LAST_ARG_REGNUM ? "" : ", ");
18110 fprintf (f, "}\n");
18113 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
18114 SP_REGNUM, SP_REGNUM,
18115 crtl->args.pretend_args_size);
18117 /* We don't need to record the stores for unwinding (would it
18118 help the debugger any if we did?), but record the change in
18119 the stack pointer. */
18120 if (dwarf2out_do_frame ())
18122 char *l = dwarf2out_cfi_label (false);
18124 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
18125 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
18129 /* Get the registers we are going to push. */
18130 offsets = arm_get_frame_offsets ();
18131 live_regs_mask = offsets->saved_regs_mask;
18132 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
18133 l_mask = live_regs_mask & 0x40ff;
18134 /* Then count how many other high registers will need to be pushed. */
18135 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
18137 if (TARGET_BACKTRACE)
18140 unsigned work_register;
18142 /* We have been asked to create a stack backtrace structure.
18143 The code looks like this:
18147 0 sub SP, #16 Reserve space for 4 registers.
18148 2 push {R7} Push low registers.
18149 4 add R7, SP, #20 Get the stack pointer before the push.
18150 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
18151 8 mov R7, PC Get hold of the start of this code plus 12.
18152 10 str R7, [SP, #16] Store it.
18153 12 mov R7, FP Get hold of the current frame pointer.
18154 14 str R7, [SP, #4] Store it.
18155 16 mov R7, LR Get hold of the current return address.
18156 18 str R7, [SP, #12] Store it.
18157 20 add R7, SP, #16 Point at the start of the backtrace structure.
18158 22 mov FP, R7 Put this value into the frame pointer. */
18160 work_register = thumb_find_work_register (live_regs_mask);
18162 if (ARM_EABI_UNWIND_TABLES)
18163 asm_fprintf (f, "\t.pad #16\n");
18166 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
18167 SP_REGNUM, SP_REGNUM);
18169 if (dwarf2out_do_frame ())
18171 char *l = dwarf2out_cfi_label (false);
18173 cfa_offset = cfa_offset + 16;
18174 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
18179 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
18180 offset = bit_count (l_mask) * UNITS_PER_WORD;
18185 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
18186 offset + 16 + crtl->args.pretend_args_size);
18188 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
18191 /* Make sure that the instruction fetching the PC is in the right place
18192 to calculate "start of backtrace creation code + 12". */
18195 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
18196 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
18198 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
18199 ARM_HARD_FRAME_POINTER_REGNUM);
18200 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
18205 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
18206 ARM_HARD_FRAME_POINTER_REGNUM);
18207 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
18209 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
18210 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
18214 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
18215 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
18217 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
18219 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
18220 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
18222 /* Optimization: If we are not pushing any low registers but we are going
18223 to push some high registers then delay our first push. This will just
18224 be a push of LR and we can combine it with the push of the first high
18226 else if ((l_mask & 0xff) != 0
18227 || (high_regs_pushed == 0 && l_mask))
18228 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
18230 if (high_regs_pushed)
18232 unsigned pushable_regs;
18233 unsigned next_hi_reg;
18235 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
18236 if (live_regs_mask & (1 << next_hi_reg))
18239 pushable_regs = l_mask & 0xff;
18241 if (pushable_regs == 0)
18242 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
18244 while (high_regs_pushed > 0)
18246 unsigned long real_regs_mask = 0;
18248 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
18250 if (pushable_regs & (1 << regno))
18252 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
18254 high_regs_pushed --;
18255 real_regs_mask |= (1 << next_hi_reg);
18257 if (high_regs_pushed)
18259 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
18261 if (live_regs_mask & (1 << next_hi_reg))
18266 pushable_regs &= ~((1 << regno) - 1);
18272 /* If we had to find a work register and we have not yet
18273 saved the LR then add it to the list of regs to push. */
18274 if (l_mask == (1 << LR_REGNUM))
18276 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
18278 real_regs_mask | (1 << LR_REGNUM));
18282 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
18287 /* Handle the case of a double word load into a low register from
18288 a computed memory address. The computed address may involve a
18289 register which is overwritten by the load. */
18291 thumb_load_double_from_address (rtx *operands)
18299 gcc_assert (GET_CODE (operands[0]) == REG);
18300 gcc_assert (GET_CODE (operands[1]) == MEM);
18302 /* Get the memory address. */
18303 addr = XEXP (operands[1], 0);
18305 /* Work out how the memory address is computed. */
18306 switch (GET_CODE (addr))
18309 operands[2] = adjust_address (operands[1], SImode, 4);
18311 if (REGNO (operands[0]) == REGNO (addr))
18313 output_asm_insn ("ldr\t%H0, %2", operands);
18314 output_asm_insn ("ldr\t%0, %1", operands);
18318 output_asm_insn ("ldr\t%0, %1", operands);
18319 output_asm_insn ("ldr\t%H0, %2", operands);
18324 /* Compute <address> + 4 for the high order load. */
18325 operands[2] = adjust_address (operands[1], SImode, 4);
18327 output_asm_insn ("ldr\t%0, %1", operands);
18328 output_asm_insn ("ldr\t%H0, %2", operands);
18332 arg1 = XEXP (addr, 0);
18333 arg2 = XEXP (addr, 1);
18335 if (CONSTANT_P (arg1))
18336 base = arg2, offset = arg1;
18338 base = arg1, offset = arg2;
18340 gcc_assert (GET_CODE (base) == REG);
18342 /* Catch the case of <address> = <reg> + <reg> */
18343 if (GET_CODE (offset) == REG)
18345 int reg_offset = REGNO (offset);
18346 int reg_base = REGNO (base);
18347 int reg_dest = REGNO (operands[0]);
18349 /* Add the base and offset registers together into the
18350 higher destination register. */
18351 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
18352 reg_dest + 1, reg_base, reg_offset);
18354 /* Load the lower destination register from the address in
18355 the higher destination register. */
18356 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
18357 reg_dest, reg_dest + 1);
18359 /* Load the higher destination register from its own address
18361 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
18362 reg_dest + 1, reg_dest + 1);
18366 /* Compute <address> + 4 for the high order load. */
18367 operands[2] = adjust_address (operands[1], SImode, 4);
18369 /* If the computed address is held in the low order register
18370 then load the high order register first, otherwise always
18371 load the low order register first. */
18372 if (REGNO (operands[0]) == REGNO (base))
18374 output_asm_insn ("ldr\t%H0, %2", operands);
18375 output_asm_insn ("ldr\t%0, %1", operands);
18379 output_asm_insn ("ldr\t%0, %1", operands);
18380 output_asm_insn ("ldr\t%H0, %2", operands);
18386 /* With no registers to worry about we can just load the value
18388 operands[2] = adjust_address (operands[1], SImode, 4);
18390 output_asm_insn ("ldr\t%H0, %2", operands);
18391 output_asm_insn ("ldr\t%0, %1", operands);
18395 gcc_unreachable ();
18402 thumb_output_move_mem_multiple (int n, rtx *operands)
18409 if (REGNO (operands[4]) > REGNO (operands[5]))
18412 operands[4] = operands[5];
18415 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
18416 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
18420 if (REGNO (operands[4]) > REGNO (operands[5]))
18423 operands[4] = operands[5];
18426 if (REGNO (operands[5]) > REGNO (operands[6]))
18429 operands[5] = operands[6];
18432 if (REGNO (operands[4]) > REGNO (operands[5]))
18435 operands[4] = operands[5];
18439 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
18440 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
18444 gcc_unreachable ();
18450 /* Output a call-via instruction for thumb state. */
18452 thumb_call_via_reg (rtx reg)
18454 int regno = REGNO (reg);
18457 gcc_assert (regno < LR_REGNUM);
18459 /* If we are in the normal text section we can use a single instance
18460 per compilation unit. If we are doing function sections, then we need
18461 an entry per section, since we can't rely on reachability. */
18462 if (in_section == text_section)
18464 thumb_call_reg_needed = 1;
18466 if (thumb_call_via_label[regno] == NULL)
18467 thumb_call_via_label[regno] = gen_label_rtx ();
18468 labelp = thumb_call_via_label + regno;
18472 if (cfun->machine->call_via[regno] == NULL)
18473 cfun->machine->call_via[regno] = gen_label_rtx ();
18474 labelp = cfun->machine->call_via + regno;
18477 output_asm_insn ("bl\t%a0", labelp);
18481 /* Routines for generating rtl. */
18483 thumb_expand_movmemqi (rtx *operands)
18485 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
18486 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
18487 HOST_WIDE_INT len = INTVAL (operands[2]);
18488 HOST_WIDE_INT offset = 0;
18492 emit_insn (gen_movmem12b (out, in, out, in));
18498 emit_insn (gen_movmem8b (out, in, out, in));
18504 rtx reg = gen_reg_rtx (SImode);
18505 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
18506 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
18513 rtx reg = gen_reg_rtx (HImode);
18514 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
18515 plus_constant (in, offset))));
18516 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
18524 rtx reg = gen_reg_rtx (QImode);
18525 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
18526 plus_constant (in, offset))));
18527 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
18533 thumb_reload_out_hi (rtx *operands)
18535 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
18538 /* Handle reading a half-word from memory during reload. */
18540 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
18542 gcc_unreachable ();
18545 /* Return the length of a function name prefix
18546 that starts with the character 'c'. */
18548 arm_get_strip_length (int c)
18552 ARM_NAME_ENCODING_LENGTHS
18557 /* Return a pointer to a function's name with any
18558 and all prefix encodings stripped from it. */
18560 arm_strip_name_encoding (const char *name)
18564 while ((skip = arm_get_strip_length (* name)))
18570 /* If there is a '*' anywhere in the name's prefix, then
18571 emit the stripped name verbatim, otherwise prepend an
18572 underscore if leading underscores are being used. */
18574 arm_asm_output_labelref (FILE *stream, const char *name)
18579 while ((skip = arm_get_strip_length (* name)))
18581 verbatim |= (*name == '*');
18586 fputs (name, stream);
18588 asm_fprintf (stream, "%U%s", name);
18592 arm_file_start (void)
18596 if (TARGET_UNIFIED_ASM)
18597 asm_fprintf (asm_out_file, "\t.syntax unified\n");
18601 const char *fpu_name;
18602 if (arm_select[0].string)
18603 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
18604 else if (arm_select[1].string)
18605 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
18607 asm_fprintf (asm_out_file, "\t.cpu %s\n",
18608 all_cores[arm_default_cpu].name);
18610 if (TARGET_SOFT_FLOAT)
18613 fpu_name = "softvfp";
18615 fpu_name = "softfpa";
18619 int set_float_abi_attributes = 0;
18620 switch (arm_fpu_arch)
18625 case FPUTYPE_FPA_EMU2:
18628 case FPUTYPE_FPA_EMU3:
18631 case FPUTYPE_MAVERICK:
18632 fpu_name = "maverick";
18636 set_float_abi_attributes = 1;
18638 case FPUTYPE_VFP3D16:
18639 fpu_name = "vfpv3-d16";
18640 set_float_abi_attributes = 1;
18643 fpu_name = "vfpv3";
18644 set_float_abi_attributes = 1;
18648 set_float_abi_attributes = 1;
18650 case FPUTYPE_NEON_FP16:
18651 fpu_name = "neon-fp16";
18652 set_float_abi_attributes = 1;
18657 if (set_float_abi_attributes)
18659 if (TARGET_HARD_FLOAT)
18660 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
18661 if (TARGET_HARD_FLOAT_ABI)
18662 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
18665 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
18667 /* Some of these attributes only apply when the corresponding features
18668 are used. However we don't have any easy way of figuring this out.
18669 Conservatively record the setting that would have been used. */
18671 /* Tag_ABI_FP_rounding. */
18672 if (flag_rounding_math)
18673 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
18674 if (!flag_unsafe_math_optimizations)
18676 /* Tag_ABI_FP_denomal. */
18677 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
18678 /* Tag_ABI_FP_exceptions. */
18679 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
18681 /* Tag_ABI_FP_user_exceptions. */
18682 if (flag_signaling_nans)
18683 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
18684 /* Tag_ABI_FP_number_model. */
18685 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
18686 flag_finite_math_only ? 1 : 3);
18688 /* Tag_ABI_align8_needed. */
18689 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
18690 /* Tag_ABI_align8_preserved. */
18691 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
18692 /* Tag_ABI_enum_size. */
18693 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
18694 flag_short_enums ? 1 : 2);
18696 /* Tag_ABI_optimization_goals. */
18699 else if (optimize >= 2)
18705 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
18707 /* Tag_ABI_FP_16bit_format. */
18708 if (arm_fp16_format)
18709 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
18710 (int)arm_fp16_format);
18712 if (arm_lang_output_object_attributes_hook)
18713 arm_lang_output_object_attributes_hook();
18715 default_file_start();
18719 arm_file_end (void)
18723 if (NEED_INDICATE_EXEC_STACK)
18724 /* Add .note.GNU-stack. */
18725 file_end_indicate_exec_stack ();
18727 if (! thumb_call_reg_needed)
18730 switch_to_section (text_section);
18731 asm_fprintf (asm_out_file, "\t.code 16\n");
18732 ASM_OUTPUT_ALIGN (asm_out_file, 1);
18734 for (regno = 0; regno < LR_REGNUM; regno++)
18736 rtx label = thumb_call_via_label[regno];
18740 targetm.asm_out.internal_label (asm_out_file, "L",
18741 CODE_LABEL_NUMBER (label));
18742 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
18748 /* Symbols in the text segment can be accessed without indirecting via the
18749 constant pool; it may take an extra binary operation, but this is still
18750 faster than indirecting via memory. Don't do this when not optimizing,
18751 since we won't be calculating al of the offsets necessary to do this
18755 arm_encode_section_info (tree decl, rtx rtl, int first)
18757 if (optimize > 0 && TREE_CONSTANT (decl))
18758 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
18760 default_encode_section_info (decl, rtl, first);
18762 #endif /* !ARM_PE */
18765 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
18767 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
18768 && !strcmp (prefix, "L"))
18770 arm_ccfsm_state = 0;
18771 arm_target_insn = NULL;
18773 default_internal_label (stream, prefix, labelno);
18776 /* Output code to add DELTA to the first argument, and then jump
18777 to FUNCTION. Used for C++ multiple inheritance. */
18779 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
18780 HOST_WIDE_INT delta,
18781 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
18784 static int thunk_label = 0;
18787 int mi_delta = delta;
18788 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
18790 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
18793 mi_delta = - mi_delta;
18797 int labelno = thunk_label++;
18798 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
18799 /* Thunks are entered in arm mode when avaiable. */
18800 if (TARGET_THUMB1_ONLY)
18802 /* push r3 so we can use it as a temporary. */
18803 /* TODO: Omit this save if r3 is not used. */
18804 fputs ("\tpush {r3}\n", file);
18805 fputs ("\tldr\tr3, ", file);
18809 fputs ("\tldr\tr12, ", file);
18811 assemble_name (file, label);
18812 fputc ('\n', file);
18815 /* If we are generating PIC, the ldr instruction below loads
18816 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
18817 the address of the add + 8, so we have:
18819 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
18822 Note that we have "+ 1" because some versions of GNU ld
18823 don't set the low bit of the result for R_ARM_REL32
18824 relocations against thumb function symbols.
18825 On ARMv6M this is +4, not +8. */
18826 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
18827 assemble_name (file, labelpc);
18828 fputs (":\n", file);
18829 if (TARGET_THUMB1_ONLY)
18831 /* This is 2 insns after the start of the thunk, so we know it
18832 is 4-byte aligned. */
18833 fputs ("\tadd\tr3, pc, r3\n", file);
18834 fputs ("\tmov r12, r3\n", file);
18837 fputs ("\tadd\tr12, pc, r12\n", file);
18839 else if (TARGET_THUMB1_ONLY)
18840 fputs ("\tmov r12, r3\n", file);
18842 if (TARGET_THUMB1_ONLY)
18844 if (mi_delta > 255)
18846 fputs ("\tldr\tr3, ", file);
18847 assemble_name (file, label);
18848 fputs ("+4\n", file);
18849 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
18850 mi_op, this_regno, this_regno);
18852 else if (mi_delta != 0)
18854 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
18855 mi_op, this_regno, this_regno,
18861 /* TODO: Use movw/movt for large constants when available. */
18862 while (mi_delta != 0)
18864 if ((mi_delta & (3 << shift)) == 0)
18868 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
18869 mi_op, this_regno, this_regno,
18870 mi_delta & (0xff << shift));
18871 mi_delta &= ~(0xff << shift);
18878 if (TARGET_THUMB1_ONLY)
18879 fputs ("\tpop\t{r3}\n", file);
18881 fprintf (file, "\tbx\tr12\n");
18882 ASM_OUTPUT_ALIGN (file, 2);
18883 assemble_name (file, label);
18884 fputs (":\n", file);
18887 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
18888 rtx tem = XEXP (DECL_RTL (function), 0);
18889 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
18890 tem = gen_rtx_MINUS (GET_MODE (tem),
18892 gen_rtx_SYMBOL_REF (Pmode,
18893 ggc_strdup (labelpc)));
18894 assemble_integer (tem, 4, BITS_PER_WORD, 1);
18897 /* Output ".word .LTHUNKn". */
18898 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
18900 if (TARGET_THUMB1_ONLY && mi_delta > 255)
18901 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
18905 fputs ("\tb\t", file);
18906 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
18907 if (NEED_PLT_RELOC)
18908 fputs ("(PLT)", file);
18909 fputc ('\n', file);
18914 arm_emit_vector_const (FILE *file, rtx x)
18917 const char * pattern;
18919 gcc_assert (GET_CODE (x) == CONST_VECTOR);
18921 switch (GET_MODE (x))
18923 case V2SImode: pattern = "%08x"; break;
18924 case V4HImode: pattern = "%04x"; break;
18925 case V8QImode: pattern = "%02x"; break;
18926 default: gcc_unreachable ();
18929 fprintf (file, "0x");
18930 for (i = CONST_VECTOR_NUNITS (x); i--;)
18934 element = CONST_VECTOR_ELT (x, i);
18935 fprintf (file, pattern, INTVAL (element));
18941 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
18942 HFmode constant pool entries are actually loaded with ldr. */
18944 arm_emit_fp16_const (rtx c)
18949 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
18950 bits = real_to_target (NULL, &r, HFmode);
18951 if (WORDS_BIG_ENDIAN)
18952 assemble_zeros (2);
18953 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
18954 if (!WORDS_BIG_ENDIAN)
18955 assemble_zeros (2);
18959 arm_output_load_gr (rtx *operands)
18966 if (GET_CODE (operands [1]) != MEM
18967 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
18968 || GET_CODE (reg = XEXP (sum, 0)) != REG
18969 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
18970 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
18971 return "wldrw%?\t%0, %1";
18973 /* Fix up an out-of-range load of a GR register. */
18974 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
18975 wcgr = operands[0];
18977 output_asm_insn ("ldr%?\t%0, %1", operands);
18979 operands[0] = wcgr;
18981 output_asm_insn ("tmcr%?\t%0, %1", operands);
18982 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
18987 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
18989 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
18990 named arg and all anonymous args onto the stack.
18991 XXX I know the prologue shouldn't be pushing registers, but it is faster
18995 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
18996 enum machine_mode mode,
18999 int second_time ATTRIBUTE_UNUSED)
19001 int nregs = cum->nregs;
19003 && ARM_DOUBLEWORD_ALIGN
19004 && arm_needs_doubleword_align (mode, type))
19007 cfun->machine->uses_anonymous_args = 1;
19008 if (nregs < NUM_ARG_REGS)
19009 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
19012 /* Return nonzero if the CONSUMER instruction (a store) does not need
19013 PRODUCER's value to calculate the address. */
19016 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
19018 rtx value = PATTERN (producer);
19019 rtx addr = PATTERN (consumer);
19021 if (GET_CODE (value) == COND_EXEC)
19022 value = COND_EXEC_CODE (value);
19023 if (GET_CODE (value) == PARALLEL)
19024 value = XVECEXP (value, 0, 0);
19025 value = XEXP (value, 0);
19026 if (GET_CODE (addr) == COND_EXEC)
19027 addr = COND_EXEC_CODE (addr);
19028 if (GET_CODE (addr) == PARALLEL)
19029 addr = XVECEXP (addr, 0, 0);
19030 addr = XEXP (addr, 0);
19032 return !reg_overlap_mentioned_p (value, addr);
19035 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
19036 have an early register shift value or amount dependency on the
19037 result of PRODUCER. */
19040 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
19042 rtx value = PATTERN (producer);
19043 rtx op = PATTERN (consumer);
19046 if (GET_CODE (value) == COND_EXEC)
19047 value = COND_EXEC_CODE (value);
19048 if (GET_CODE (value) == PARALLEL)
19049 value = XVECEXP (value, 0, 0);
19050 value = XEXP (value, 0);
19051 if (GET_CODE (op) == COND_EXEC)
19052 op = COND_EXEC_CODE (op);
19053 if (GET_CODE (op) == PARALLEL)
19054 op = XVECEXP (op, 0, 0);
19057 early_op = XEXP (op, 0);
19058 /* This is either an actual independent shift, or a shift applied to
19059 the first operand of another operation. We want the whole shift
19061 if (GET_CODE (early_op) == REG)
19064 return !reg_overlap_mentioned_p (value, early_op);
19067 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
19068 have an early register shift value dependency on the result of
19072 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
19074 rtx value = PATTERN (producer);
19075 rtx op = PATTERN (consumer);
19078 if (GET_CODE (value) == COND_EXEC)
19079 value = COND_EXEC_CODE (value);
19080 if (GET_CODE (value) == PARALLEL)
19081 value = XVECEXP (value, 0, 0);
19082 value = XEXP (value, 0);
19083 if (GET_CODE (op) == COND_EXEC)
19084 op = COND_EXEC_CODE (op);
19085 if (GET_CODE (op) == PARALLEL)
19086 op = XVECEXP (op, 0, 0);
19089 early_op = XEXP (op, 0);
19091 /* This is either an actual independent shift, or a shift applied to
19092 the first operand of another operation. We want the value being
19093 shifted, in either case. */
19094 if (GET_CODE (early_op) != REG)
19095 early_op = XEXP (early_op, 0);
19097 return !reg_overlap_mentioned_p (value, early_op);
19100 /* Return nonzero if the CONSUMER (a mul or mac op) does not
19101 have an early register mult dependency on the result of
19105 arm_no_early_mul_dep (rtx producer, rtx consumer)
19107 rtx value = PATTERN (producer);
19108 rtx op = PATTERN (consumer);
19110 if (GET_CODE (value) == COND_EXEC)
19111 value = COND_EXEC_CODE (value);
19112 if (GET_CODE (value) == PARALLEL)
19113 value = XVECEXP (value, 0, 0);
19114 value = XEXP (value, 0);
19115 if (GET_CODE (op) == COND_EXEC)
19116 op = COND_EXEC_CODE (op);
19117 if (GET_CODE (op) == PARALLEL)
19118 op = XVECEXP (op, 0, 0);
19121 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
19123 if (GET_CODE (XEXP (op, 0)) == MULT)
19124 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
19126 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
19132 /* We can't rely on the caller doing the proper promotion when
19133 using APCS or ATPCS. */
19136 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
19138 return !TARGET_AAPCS_BASED;
19142 /* AAPCS based ABIs use short enums by default. */
19145 arm_default_short_enums (void)
19147 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
19151 /* AAPCS requires that anonymous bitfields affect structure alignment. */
19154 arm_align_anon_bitfield (void)
19156 return TARGET_AAPCS_BASED;
19160 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
19163 arm_cxx_guard_type (void)
19165 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
19168 /* Return non-zero if the consumer (a multiply-accumulate instruction)
19169 has an accumulator dependency on the result of the producer (a
19170 multiplication instruction) and no other dependency on that result. */
19172 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
19174 rtx mul = PATTERN (producer);
19175 rtx mac = PATTERN (consumer);
19177 rtx mac_op0, mac_op1, mac_acc;
19179 if (GET_CODE (mul) == COND_EXEC)
19180 mul = COND_EXEC_CODE (mul);
19181 if (GET_CODE (mac) == COND_EXEC)
19182 mac = COND_EXEC_CODE (mac);
19184 /* Check that mul is of the form (set (...) (mult ...))
19185 and mla is of the form (set (...) (plus (mult ...) (...))). */
19186 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
19187 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
19188 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
19191 mul_result = XEXP (mul, 0);
19192 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
19193 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
19194 mac_acc = XEXP (XEXP (mac, 1), 1);
19196 return (reg_overlap_mentioned_p (mul_result, mac_acc)
19197 && !reg_overlap_mentioned_p (mul_result, mac_op0)
19198 && !reg_overlap_mentioned_p (mul_result, mac_op1));
19202 /* The EABI says test the least significant bit of a guard variable. */
19205 arm_cxx_guard_mask_bit (void)
19207 return TARGET_AAPCS_BASED;
19211 /* The EABI specifies that all array cookies are 8 bytes long. */
19214 arm_get_cookie_size (tree type)
19218 if (!TARGET_AAPCS_BASED)
19219 return default_cxx_get_cookie_size (type);
19221 size = build_int_cst (sizetype, 8);
19226 /* The EABI says that array cookies should also contain the element size. */
19229 arm_cookie_has_size (void)
19231 return TARGET_AAPCS_BASED;
19235 /* The EABI says constructors and destructors should return a pointer to
19236 the object constructed/destroyed. */
19239 arm_cxx_cdtor_returns_this (void)
19241 return TARGET_AAPCS_BASED;
19244 /* The EABI says that an inline function may never be the key
19248 arm_cxx_key_method_may_be_inline (void)
19250 return !TARGET_AAPCS_BASED;
19254 arm_cxx_determine_class_data_visibility (tree decl)
19256 if (!TARGET_AAPCS_BASED
19257 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
19260 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
19261 is exported. However, on systems without dynamic vague linkage,
19262 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
19263 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
19264 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
19266 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
19267 DECL_VISIBILITY_SPECIFIED (decl) = 1;
19271 arm_cxx_class_data_always_comdat (void)
19273 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
19274 vague linkage if the class has no key function. */
19275 return !TARGET_AAPCS_BASED;
19279 /* The EABI says __aeabi_atexit should be used to register static
19283 arm_cxx_use_aeabi_atexit (void)
19285 return TARGET_AAPCS_BASED;
19290 arm_set_return_address (rtx source, rtx scratch)
19292 arm_stack_offsets *offsets;
19293 HOST_WIDE_INT delta;
19295 unsigned long saved_regs;
19297 offsets = arm_get_frame_offsets ();
19298 saved_regs = offsets->saved_regs_mask;
19300 if ((saved_regs & (1 << LR_REGNUM)) == 0)
19301 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
19304 if (frame_pointer_needed)
19305 addr = plus_constant(hard_frame_pointer_rtx, -4);
19308 /* LR will be the first saved register. */
19309 delta = offsets->outgoing_args - (offsets->frame + 4);
19314 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
19315 GEN_INT (delta & ~4095)));
19320 addr = stack_pointer_rtx;
19322 addr = plus_constant (addr, delta);
19324 emit_move_insn (gen_frame_mem (Pmode, addr), source);
19330 thumb_set_return_address (rtx source, rtx scratch)
19332 arm_stack_offsets *offsets;
19333 HOST_WIDE_INT delta;
19334 HOST_WIDE_INT limit;
19337 unsigned long mask;
19341 offsets = arm_get_frame_offsets ();
19342 mask = offsets->saved_regs_mask;
19343 if (mask & (1 << LR_REGNUM))
19346 /* Find the saved regs. */
19347 if (frame_pointer_needed)
19349 delta = offsets->soft_frame - offsets->saved_args;
19350 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
19356 delta = offsets->outgoing_args - offsets->saved_args;
19359 /* Allow for the stack frame. */
19360 if (TARGET_THUMB1 && TARGET_BACKTRACE)
19362 /* The link register is always the first saved register. */
19365 /* Construct the address. */
19366 addr = gen_rtx_REG (SImode, reg);
19369 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
19370 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
19374 addr = plus_constant (addr, delta);
19376 emit_move_insn (gen_frame_mem (Pmode, addr), source);
19379 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
19382 /* Implements target hook vector_mode_supported_p. */
19384 arm_vector_mode_supported_p (enum machine_mode mode)
19386 /* Neon also supports V2SImode, etc. listed in the clause below. */
19387 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
19388 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
19391 if ((mode == V2SImode)
19392 || (mode == V4HImode)
19393 || (mode == V8QImode))
19399 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
19400 ARM insns and therefore guarantee that the shift count is modulo 256.
19401 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
19402 guarantee no particular behavior for out-of-range counts. */
19404 static unsigned HOST_WIDE_INT
19405 arm_shift_truncation_mask (enum machine_mode mode)
19407 return mode == SImode ? 255 : 0;
19411 /* Map internal gcc register numbers to DWARF2 register numbers. */
19414 arm_dbx_register_number (unsigned int regno)
19419 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
19420 compatibility. The EABI defines them as registers 96-103. */
19421 if (IS_FPA_REGNUM (regno))
19422 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
19424 /* FIXME: VFPv3 register numbering. */
19425 if (IS_VFP_REGNUM (regno))
19426 return 64 + regno - FIRST_VFP_REGNUM;
19428 if (IS_IWMMXT_GR_REGNUM (regno))
19429 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
19431 if (IS_IWMMXT_REGNUM (regno))
19432 return 112 + regno - FIRST_IWMMXT_REGNUM;
19434 gcc_unreachable ();
19438 #ifdef TARGET_UNWIND_INFO
19439 /* Emit unwind directives for a store-multiple instruction or stack pointer
19440 push during alignment.
19441 These should only ever be generated by the function prologue code, so
19442 expect them to have a particular form. */
19445 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
19448 HOST_WIDE_INT offset;
19449 HOST_WIDE_INT nregs;
19455 e = XVECEXP (p, 0, 0);
19456 if (GET_CODE (e) != SET)
19459 /* First insn will adjust the stack pointer. */
19460 if (GET_CODE (e) != SET
19461 || GET_CODE (XEXP (e, 0)) != REG
19462 || REGNO (XEXP (e, 0)) != SP_REGNUM
19463 || GET_CODE (XEXP (e, 1)) != PLUS)
19466 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
19467 nregs = XVECLEN (p, 0) - 1;
19469 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
19472 /* The function prologue may also push pc, but not annotate it as it is
19473 never restored. We turn this into a stack pointer adjustment. */
19474 if (nregs * 4 == offset - 4)
19476 fprintf (asm_out_file, "\t.pad #4\n");
19480 fprintf (asm_out_file, "\t.save {");
19482 else if (IS_VFP_REGNUM (reg))
19485 fprintf (asm_out_file, "\t.vsave {");
19487 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
19489 /* FPA registers are done differently. */
19490 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
19494 /* Unknown register type. */
19497 /* If the stack increment doesn't match the size of the saved registers,
19498 something has gone horribly wrong. */
19499 if (offset != nregs * reg_size)
19504 /* The remaining insns will describe the stores. */
19505 for (i = 1; i <= nregs; i++)
19507 /* Expect (set (mem <addr>) (reg)).
19508 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
19509 e = XVECEXP (p, 0, i);
19510 if (GET_CODE (e) != SET
19511 || GET_CODE (XEXP (e, 0)) != MEM
19512 || GET_CODE (XEXP (e, 1)) != REG)
19515 reg = REGNO (XEXP (e, 1));
19520 fprintf (asm_out_file, ", ");
19521 /* We can't use %r for vfp because we need to use the
19522 double precision register names. */
19523 if (IS_VFP_REGNUM (reg))
19524 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
19526 asm_fprintf (asm_out_file, "%r", reg);
19528 #ifdef ENABLE_CHECKING
19529 /* Check that the addresses are consecutive. */
19530 e = XEXP (XEXP (e, 0), 0);
19531 if (GET_CODE (e) == PLUS)
19533 offset += reg_size;
19534 if (GET_CODE (XEXP (e, 0)) != REG
19535 || REGNO (XEXP (e, 0)) != SP_REGNUM
19536 || GET_CODE (XEXP (e, 1)) != CONST_INT
19537 || offset != INTVAL (XEXP (e, 1)))
19541 || GET_CODE (e) != REG
19542 || REGNO (e) != SP_REGNUM)
19546 fprintf (asm_out_file, "}\n");
19549 /* Emit unwind directives for a SET. */
19552 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
19560 switch (GET_CODE (e0))
19563 /* Pushing a single register. */
19564 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
19565 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
19566 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
19569 asm_fprintf (asm_out_file, "\t.save ");
19570 if (IS_VFP_REGNUM (REGNO (e1)))
19571 asm_fprintf(asm_out_file, "{d%d}\n",
19572 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
19574 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
19578 if (REGNO (e0) == SP_REGNUM)
19580 /* A stack increment. */
19581 if (GET_CODE (e1) != PLUS
19582 || GET_CODE (XEXP (e1, 0)) != REG
19583 || REGNO (XEXP (e1, 0)) != SP_REGNUM
19584 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
19587 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
19588 -INTVAL (XEXP (e1, 1)));
19590 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
19592 HOST_WIDE_INT offset;
19594 if (GET_CODE (e1) == PLUS)
19596 if (GET_CODE (XEXP (e1, 0)) != REG
19597 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
19599 reg = REGNO (XEXP (e1, 0));
19600 offset = INTVAL (XEXP (e1, 1));
19601 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
19602 HARD_FRAME_POINTER_REGNUM, reg,
19603 INTVAL (XEXP (e1, 1)));
19605 else if (GET_CODE (e1) == REG)
19608 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
19609 HARD_FRAME_POINTER_REGNUM, reg);
19614 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
19616 /* Move from sp to reg. */
19617 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
19619 else if (GET_CODE (e1) == PLUS
19620 && GET_CODE (XEXP (e1, 0)) == REG
19621 && REGNO (XEXP (e1, 0)) == SP_REGNUM
19622 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
19624 /* Set reg to offset from sp. */
19625 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
19626 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
19628 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
19630 /* Stack pointer save before alignment. */
19632 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
19645 /* Emit unwind directives for the given insn. */
19648 arm_unwind_emit (FILE * asm_out_file, rtx insn)
19652 if (!ARM_EABI_UNWIND_TABLES)
19655 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
19656 && (TREE_NOTHROW (current_function_decl)
19657 || crtl->all_throwers_are_sibcalls))
19660 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
19663 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
19665 pat = XEXP (pat, 0);
19667 pat = PATTERN (insn);
19669 switch (GET_CODE (pat))
19672 arm_unwind_emit_set (asm_out_file, pat);
19676 /* Store multiple. */
19677 arm_unwind_emit_sequence (asm_out_file, pat);
19686 /* Output a reference from a function exception table to the type_info
19687 object X. The EABI specifies that the symbol should be relocated by
19688 an R_ARM_TARGET2 relocation. */
19691 arm_output_ttype (rtx x)
19693 fputs ("\t.word\t", asm_out_file);
19694 output_addr_const (asm_out_file, x);
19695 /* Use special relocations for symbol references. */
19696 if (GET_CODE (x) != CONST_INT)
19697 fputs ("(TARGET2)", asm_out_file);
19698 fputc ('\n', asm_out_file);
19702 #endif /* TARGET_UNWIND_INFO */
19705 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
19706 stack alignment. */
19709 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
19711 rtx unspec = SET_SRC (pattern);
19712 gcc_assert (GET_CODE (unspec) == UNSPEC);
19716 case UNSPEC_STACK_ALIGN:
19717 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
19718 put anything on the stack, so hopefully it won't matter.
19719 CFA = SP will be correct after alignment. */
19720 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
19721 SET_DEST (pattern));
19724 gcc_unreachable ();
19729 /* Output unwind directives for the start/end of a function. */
19732 arm_output_fn_unwind (FILE * f, bool prologue)
19734 if (!ARM_EABI_UNWIND_TABLES)
19738 fputs ("\t.fnstart\n", f);
19741 /* If this function will never be unwound, then mark it as such.
19742 The came condition is used in arm_unwind_emit to suppress
19743 the frame annotations. */
19744 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
19745 && (TREE_NOTHROW (current_function_decl)
19746 || crtl->all_throwers_are_sibcalls))
19747 fputs("\t.cantunwind\n", f);
19749 fputs ("\t.fnend\n", f);
19754 arm_emit_tls_decoration (FILE *fp, rtx x)
19756 enum tls_reloc reloc;
19759 val = XVECEXP (x, 0, 0);
19760 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
19762 output_addr_const (fp, val);
19767 fputs ("(tlsgd)", fp);
19770 fputs ("(tlsldm)", fp);
19773 fputs ("(tlsldo)", fp);
19776 fputs ("(gottpoff)", fp);
19779 fputs ("(tpoff)", fp);
19782 gcc_unreachable ();
19790 fputs (" + (. - ", fp);
19791 output_addr_const (fp, XVECEXP (x, 0, 2));
19793 output_addr_const (fp, XVECEXP (x, 0, 3));
19803 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
19806 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
19808 gcc_assert (size == 4);
19809 fputs ("\t.word\t", file);
19810 output_addr_const (file, x);
19811 fputs ("(tlsldo)", file);
19815 arm_output_addr_const_extra (FILE *fp, rtx x)
19817 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
19818 return arm_emit_tls_decoration (fp, x);
19819 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
19822 int labelno = INTVAL (XVECEXP (x, 0, 0));
19824 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
19825 assemble_name_raw (fp, label);
19829 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
19831 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
19835 output_addr_const (fp, XVECEXP (x, 0, 0));
19839 else if (GET_CODE (x) == CONST_VECTOR)
19840 return arm_emit_vector_const (fp, x);
19845 /* Output assembly for a shift instruction.
19846 SET_FLAGS determines how the instruction modifies the condition codes.
19847 0 - Do not set condition codes.
19848 1 - Set condition codes.
19849 2 - Use smallest instruction. */
19851 arm_output_shift(rtx * operands, int set_flags)
19854 static const char flag_chars[3] = {'?', '.', '!'};
19859 c = flag_chars[set_flags];
19860 if (TARGET_UNIFIED_ASM)
19862 shift = shift_op(operands[3], &val);
19866 operands[2] = GEN_INT(val);
19867 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
19870 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
19873 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
19874 output_asm_insn (pattern, operands);
19878 /* Output a Thumb-1 casesi dispatch sequence. */
19880 thumb1_output_casesi (rtx *operands)
19882 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
19883 addr_diff_vec_flags flags;
19885 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
19887 flags = ADDR_DIFF_VEC_FLAGS (diff_vec);
19889 switch (GET_MODE(diff_vec))
19892 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
19893 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
19895 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
19896 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
19898 return "bl\t%___gnu_thumb1_case_si";
19900 gcc_unreachable ();
19904 /* Output a Thumb-2 casesi instruction. */
19906 thumb2_output_casesi (rtx *operands)
19908 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
19910 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
19912 output_asm_insn ("cmp\t%0, %1", operands);
19913 output_asm_insn ("bhi\t%l3", operands);
19914 switch (GET_MODE(diff_vec))
19917 return "tbb\t[%|pc, %0]";
19919 return "tbh\t[%|pc, %0, lsl #1]";
19923 output_asm_insn ("adr\t%4, %l2", operands);
19924 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
19925 output_asm_insn ("add\t%4, %4, %5", operands);
19930 output_asm_insn ("adr\t%4, %l2", operands);
19931 return "ldr\t%|pc, [%4, %0, lsl #2]";
19934 gcc_unreachable ();
19938 /* Most ARM cores are single issue, but some newer ones can dual issue.
19939 The scheduler descriptions rely on this being correct. */
19941 arm_issue_rate (void)
19956 /* A table and a function to perform ARM-specific name mangling for
19957 NEON vector types in order to conform to the AAPCS (see "Procedure
19958 Call Standard for the ARM Architecture", Appendix A). To qualify
19959 for emission with the mangled names defined in that document, a
19960 vector type must not only be of the correct mode but also be
19961 composed of NEON vector element types (e.g. __builtin_neon_qi). */
19964 enum machine_mode mode;
19965 const char *element_type_name;
19966 const char *aapcs_name;
19967 } arm_mangle_map_entry;
19969 static arm_mangle_map_entry arm_mangle_map[] = {
19970 /* 64-bit containerized types. */
19971 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
19972 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
19973 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
19974 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
19975 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
19976 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
19977 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
19978 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
19979 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
19980 /* 128-bit containerized types. */
19981 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
19982 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
19983 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
19984 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
19985 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
19986 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
19987 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
19988 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
19989 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
19990 { VOIDmode, NULL, NULL }
19994 arm_mangle_type (const_tree type)
19996 arm_mangle_map_entry *pos = arm_mangle_map;
19998 /* The ARM ABI documents (10th October 2008) say that "__va_list"
19999 has to be managled as if it is in the "std" namespace. */
20000 if (TARGET_AAPCS_BASED
20001 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
20003 static bool warned;
20004 if (!warned && warn_psabi)
20007 inform (input_location,
20008 "the mangling of %<va_list%> has changed in GCC 4.4");
20010 return "St9__va_list";
20013 /* Half-precision float. */
20014 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
20017 if (TREE_CODE (type) != VECTOR_TYPE)
20020 /* Check the mode of the vector type, and the name of the vector
20021 element type, against the table. */
20022 while (pos->mode != VOIDmode)
20024 tree elt_type = TREE_TYPE (type);
20026 if (pos->mode == TYPE_MODE (type)
20027 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
20028 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
20029 pos->element_type_name))
20030 return pos->aapcs_name;
20035 /* Use the default mangling for unrecognized (possibly user-defined)
20040 /* Order of allocation of core registers for Thumb: this allocation is
20041 written over the corresponding initial entries of the array
20042 initialized with REG_ALLOC_ORDER. We allocate all low registers
20043 first. Saving and restoring a low register is usually cheaper than
20044 using a call-clobbered high register. */
20046 static const int thumb_core_reg_alloc_order[] =
20048 3, 2, 1, 0, 4, 5, 6, 7,
20049 14, 12, 8, 9, 10, 11, 13, 15
20052 /* Adjust register allocation order when compiling for Thumb. */
20055 arm_order_regs_for_local_alloc (void)
20057 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
20058 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
20060 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
20061 sizeof (thumb_core_reg_alloc_order));
20064 /* Set default optimization options. */
20066 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
20068 /* Enable section anchors by default at -O1 or higher.
20069 Use 2 to distinguish from an explicit -fsection-anchors
20070 given on the command line. */
20072 flag_section_anchors = 2;
20075 #include "gt-arm.h"