1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "conditions.h"
38 #include "insn-attr.h"
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
58 /* Forward definitions of types. */
59 typedef struct minipool_node Mnode;
60 typedef struct minipool_fixup Mfix;
62 EXPORTED_CONST struct attribute_spec arm_attribute_table[];
64 void (*arm_lang_output_object_attributes_hook)(void);
66 /* Forward function declarations. */
67 static int arm_compute_static_chain_stack_bytes (void);
68 static arm_stack_offsets *arm_get_frame_offsets (void);
69 static void arm_add_gc_roots (void);
70 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
71 HOST_WIDE_INT, rtx, rtx, int, int);
72 static unsigned bit_count (unsigned long);
73 static int arm_address_register_rtx_p (rtx, int);
74 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
75 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
76 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
77 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
78 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
79 inline static int thumb1_index_register_rtx_p (rtx, int);
80 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
81 static int thumb_far_jump_used_p (void);
82 static bool thumb_force_lr_save (void);
83 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
84 static rtx emit_sfm (int, int);
85 static unsigned arm_size_return_regs (void);
86 static bool arm_assemble_integer (rtx, unsigned int, int);
87 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
88 static arm_cc get_arm_condition_code (rtx);
89 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
90 static rtx is_jump_table (rtx);
91 static const char *output_multi_immediate (rtx *, const char *, const char *,
93 static const char *shift_op (rtx, HOST_WIDE_INT *);
94 static struct machine_function *arm_init_machine_status (void);
95 static void thumb_exit (FILE *, int);
96 static rtx is_jump_table (rtx);
97 static HOST_WIDE_INT get_jump_table_size (rtx);
98 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
99 static Mnode *add_minipool_forward_ref (Mfix *);
100 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
101 static Mnode *add_minipool_backward_ref (Mfix *);
102 static void assign_minipool_offsets (Mfix *);
103 static void arm_print_value (FILE *, rtx);
104 static void dump_minipool (rtx);
105 static int arm_barrier_cost (rtx);
106 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
107 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
108 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
110 static void arm_reorg (void);
111 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
112 static unsigned long arm_compute_save_reg0_reg12_mask (void);
113 static unsigned long arm_compute_save_reg_mask (void);
114 static unsigned long arm_isr_value (tree);
115 static unsigned long arm_compute_func_type (void);
116 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
117 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
118 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
119 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
121 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
122 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
123 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
124 static int arm_comp_type_attributes (const_tree, const_tree);
125 static void arm_set_default_type_attributes (tree);
126 static int arm_adjust_cost (rtx, rtx, rtx, int);
127 static int count_insns_for_constant (HOST_WIDE_INT, int);
128 static int arm_get_strip_length (int);
129 static bool arm_function_ok_for_sibcall (tree, tree);
130 static void arm_internal_label (FILE *, const char *, unsigned long);
131 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
133 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
134 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
135 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
136 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
137 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
138 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
139 static bool arm_rtx_costs (rtx, int, int, int *, bool);
140 static int arm_address_cost (rtx, bool);
141 static bool arm_memory_load_p (rtx);
142 static bool arm_cirrus_insn_p (rtx);
143 static void cirrus_reorg (rtx);
144 static void arm_init_builtins (void);
145 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
146 static void arm_init_iwmmxt_builtins (void);
147 static rtx safe_vector_operand (rtx, enum machine_mode);
148 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
149 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
150 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
151 static void emit_constant_insn (rtx cond, rtx pattern);
152 static rtx emit_set_insn (rtx, rtx);
153 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
156 #ifdef OBJECT_FORMAT_ELF
157 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
158 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
161 static void arm_encode_section_info (tree, rtx, int);
164 static void arm_file_end (void);
165 static void arm_file_start (void);
167 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
169 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
170 enum machine_mode, const_tree, bool);
171 static bool arm_promote_prototypes (const_tree);
172 static bool arm_default_short_enums (void);
173 static bool arm_align_anon_bitfield (void);
174 static bool arm_return_in_msb (const_tree);
175 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
176 static bool arm_return_in_memory (const_tree, const_tree);
177 #ifdef TARGET_UNWIND_INFO
178 static void arm_unwind_emit (FILE *, rtx);
179 static bool arm_output_ttype (rtx);
181 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
183 static tree arm_cxx_guard_type (void);
184 static bool arm_cxx_guard_mask_bit (void);
185 static tree arm_get_cookie_size (tree);
186 static bool arm_cookie_has_size (void);
187 static bool arm_cxx_cdtor_returns_this (void);
188 static bool arm_cxx_key_method_may_be_inline (void);
189 static void arm_cxx_determine_class_data_visibility (tree);
190 static bool arm_cxx_class_data_always_comdat (void);
191 static bool arm_cxx_use_aeabi_atexit (void);
192 static void arm_init_libfuncs (void);
193 static tree arm_build_builtin_va_list (void);
194 static void arm_expand_builtin_va_start (tree, rtx);
195 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
196 static bool arm_handle_option (size_t, const char *, int);
197 static void arm_target_help (void);
198 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
199 static bool arm_cannot_copy_insn_p (rtx);
200 static bool arm_tls_symbol_p (rtx x);
201 static int arm_issue_rate (void);
202 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
203 static bool arm_allocate_stack_slots_for_args (void);
204 static const char *arm_invalid_parameter_type (const_tree t);
205 static const char *arm_invalid_return_type (const_tree t);
206 static tree arm_promoted_type (const_tree t);
207 static tree arm_convert_to_type (tree type, tree expr);
208 static bool arm_scalar_mode_supported_p (enum machine_mode);
211 /* Initialize the GCC target structure. */
212 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
213 #undef TARGET_MERGE_DECL_ATTRIBUTES
214 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
217 #undef TARGET_LEGITIMIZE_ADDRESS
218 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
220 #undef TARGET_ATTRIBUTE_TABLE
221 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
223 #undef TARGET_ASM_FILE_START
224 #define TARGET_ASM_FILE_START arm_file_start
225 #undef TARGET_ASM_FILE_END
226 #define TARGET_ASM_FILE_END arm_file_end
228 #undef TARGET_ASM_ALIGNED_SI_OP
229 #define TARGET_ASM_ALIGNED_SI_OP NULL
230 #undef TARGET_ASM_INTEGER
231 #define TARGET_ASM_INTEGER arm_assemble_integer
233 #undef TARGET_ASM_FUNCTION_PROLOGUE
234 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
236 #undef TARGET_ASM_FUNCTION_EPILOGUE
237 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
239 #undef TARGET_DEFAULT_TARGET_FLAGS
240 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
241 #undef TARGET_HANDLE_OPTION
242 #define TARGET_HANDLE_OPTION arm_handle_option
244 #define TARGET_HELP arm_target_help
246 #undef TARGET_COMP_TYPE_ATTRIBUTES
247 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
249 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
250 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
252 #undef TARGET_SCHED_ADJUST_COST
253 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
255 #undef TARGET_ENCODE_SECTION_INFO
257 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
259 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
262 #undef TARGET_STRIP_NAME_ENCODING
263 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
265 #undef TARGET_ASM_INTERNAL_LABEL
266 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
268 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
269 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
271 #undef TARGET_ASM_OUTPUT_MI_THUNK
272 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
273 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
274 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
276 #undef TARGET_RTX_COSTS
277 #define TARGET_RTX_COSTS arm_rtx_costs
278 #undef TARGET_ADDRESS_COST
279 #define TARGET_ADDRESS_COST arm_address_cost
281 #undef TARGET_SHIFT_TRUNCATION_MASK
282 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
283 #undef TARGET_VECTOR_MODE_SUPPORTED_P
284 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
286 #undef TARGET_MACHINE_DEPENDENT_REORG
287 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
289 #undef TARGET_INIT_BUILTINS
290 #define TARGET_INIT_BUILTINS arm_init_builtins
291 #undef TARGET_EXPAND_BUILTIN
292 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
294 #undef TARGET_INIT_LIBFUNCS
295 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
297 #undef TARGET_PROMOTE_FUNCTION_ARGS
298 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
299 #undef TARGET_PROMOTE_FUNCTION_RETURN
300 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
301 #undef TARGET_PROMOTE_PROTOTYPES
302 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
303 #undef TARGET_PASS_BY_REFERENCE
304 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
305 #undef TARGET_ARG_PARTIAL_BYTES
306 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
308 #undef TARGET_SETUP_INCOMING_VARARGS
309 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
311 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
312 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
314 #undef TARGET_DEFAULT_SHORT_ENUMS
315 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
317 #undef TARGET_ALIGN_ANON_BITFIELD
318 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
320 #undef TARGET_NARROW_VOLATILE_BITFIELD
321 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
323 #undef TARGET_CXX_GUARD_TYPE
324 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
326 #undef TARGET_CXX_GUARD_MASK_BIT
327 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
329 #undef TARGET_CXX_GET_COOKIE_SIZE
330 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
332 #undef TARGET_CXX_COOKIE_HAS_SIZE
333 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
335 #undef TARGET_CXX_CDTOR_RETURNS_THIS
336 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
338 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
339 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
341 #undef TARGET_CXX_USE_AEABI_ATEXIT
342 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
344 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
345 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
346 arm_cxx_determine_class_data_visibility
348 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
349 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
351 #undef TARGET_RETURN_IN_MSB
352 #define TARGET_RETURN_IN_MSB arm_return_in_msb
354 #undef TARGET_RETURN_IN_MEMORY
355 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
357 #undef TARGET_MUST_PASS_IN_STACK
358 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
360 #ifdef TARGET_UNWIND_INFO
361 #undef TARGET_UNWIND_EMIT
362 #define TARGET_UNWIND_EMIT arm_unwind_emit
364 /* EABI unwinding tables use a different format for the typeinfo tables. */
365 #undef TARGET_ASM_TTYPE
366 #define TARGET_ASM_TTYPE arm_output_ttype
368 #undef TARGET_ARM_EABI_UNWINDER
369 #define TARGET_ARM_EABI_UNWINDER true
370 #endif /* TARGET_UNWIND_INFO */
372 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
373 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
375 #undef TARGET_CANNOT_COPY_INSN_P
376 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
379 #undef TARGET_HAVE_TLS
380 #define TARGET_HAVE_TLS true
383 #undef TARGET_CANNOT_FORCE_CONST_MEM
384 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
386 #undef TARGET_MAX_ANCHOR_OFFSET
387 #define TARGET_MAX_ANCHOR_OFFSET 4095
389 /* The minimum is set such that the total size of the block
390 for a particular anchor is -4088 + 1 + 4095 bytes, which is
391 divisible by eight, ensuring natural spacing of anchors. */
392 #undef TARGET_MIN_ANCHOR_OFFSET
393 #define TARGET_MIN_ANCHOR_OFFSET -4088
395 #undef TARGET_SCHED_ISSUE_RATE
396 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
398 #undef TARGET_MANGLE_TYPE
399 #define TARGET_MANGLE_TYPE arm_mangle_type
401 #undef TARGET_BUILD_BUILTIN_VA_LIST
402 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
403 #undef TARGET_EXPAND_BUILTIN_VA_START
404 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
405 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
406 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
409 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
410 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
413 #undef TARGET_LEGITIMATE_ADDRESS_P
414 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
416 #undef TARGET_INVALID_PARAMETER_TYPE
417 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
419 #undef TARGET_INVALID_RETURN_TYPE
420 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
422 #undef TARGET_PROMOTED_TYPE
423 #define TARGET_PROMOTED_TYPE arm_promoted_type
425 #undef TARGET_CONVERT_TO_TYPE
426 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
428 #undef TARGET_SCALAR_MODE_SUPPORTED_P
429 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
431 struct gcc_target targetm = TARGET_INITIALIZER;
433 /* Obstack for minipool constant handling. */
434 static struct obstack minipool_obstack;
435 static char * minipool_startobj;
437 /* The maximum number of insns skipped which
438 will be conditionalised if possible. */
439 static int max_insns_skipped = 5;
441 extern FILE * asm_out_file;
443 /* True if we are currently building a constant table. */
444 int making_const_table;
446 /* The processor for which instructions should be scheduled. */
447 enum processor_type arm_tune = arm_none;
449 /* The default processor used if not overridden by commandline. */
450 static enum processor_type arm_default_cpu = arm_none;
452 /* Which floating point model to use. */
453 enum arm_fp_model arm_fp_model;
455 /* Which floating point hardware is available. */
456 enum fputype arm_fpu_arch;
458 /* Which floating point hardware to schedule for. */
459 enum fputype arm_fpu_tune;
461 /* Whether to use floating point hardware. */
462 enum float_abi_type arm_float_abi;
464 /* Which __fp16 format to use. */
465 enum arm_fp16_format_type arm_fp16_format;
467 /* Which ABI to use. */
468 enum arm_abi_type arm_abi;
470 /* Which thread pointer model to use. */
471 enum arm_tp_type target_thread_pointer = TP_AUTO;
473 /* Used to parse -mstructure_size_boundary command line option. */
474 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
476 /* Used for Thumb call_via trampolines. */
477 rtx thumb_call_via_label[14];
478 static int thumb_call_reg_needed;
480 /* Bit values used to identify processor capabilities. */
481 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
482 #define FL_ARCH3M (1 << 1) /* Extended multiply */
483 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
484 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
485 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
486 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
487 #define FL_THUMB (1 << 6) /* Thumb aware */
488 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
489 #define FL_STRONG (1 << 8) /* StrongARM */
490 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
491 #define FL_XSCALE (1 << 10) /* XScale */
492 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
493 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
494 media instructions. */
495 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
496 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
497 Note: ARM6 & 7 derivatives only. */
498 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
499 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
500 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
502 #define FL_DIV (1 << 18) /* Hardware divide. */
503 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
504 #define FL_NEON (1 << 20) /* Neon instructions. */
506 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
508 #define FL_FOR_ARCH2 FL_NOTM
509 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
510 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
511 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
512 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
513 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
514 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
515 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
516 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
517 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
518 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
519 #define FL_FOR_ARCH6J FL_FOR_ARCH6
520 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
521 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
522 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
523 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
524 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
525 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
526 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
527 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
528 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
530 /* The bits in this mask specify which
531 instructions we are allowed to generate. */
532 static unsigned long insn_flags = 0;
534 /* The bits in this mask specify which instruction scheduling options should
536 static unsigned long tune_flags = 0;
538 /* The following are used in the arm.md file as equivalents to bits
539 in the above two flag variables. */
541 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
544 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
547 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
550 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
553 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
556 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
559 /* Nonzero if this chip supports the ARM 6K extensions. */
562 /* Nonzero if instructions not present in the 'M' profile can be used. */
563 int arm_arch_notm = 0;
565 /* Nonzero if this chip can benefit from load scheduling. */
566 int arm_ld_sched = 0;
568 /* Nonzero if this chip is a StrongARM. */
569 int arm_tune_strongarm = 0;
571 /* Nonzero if this chip is a Cirrus variant. */
572 int arm_arch_cirrus = 0;
574 /* Nonzero if this chip supports Intel Wireless MMX technology. */
575 int arm_arch_iwmmxt = 0;
577 /* Nonzero if this chip is an XScale. */
578 int arm_arch_xscale = 0;
580 /* Nonzero if tuning for XScale */
581 int arm_tune_xscale = 0;
583 /* Nonzero if we want to tune for stores that access the write-buffer.
584 This typically means an ARM6 or ARM7 with MMU or MPU. */
585 int arm_tune_wbuf = 0;
587 /* Nonzero if tuning for Cortex-A9. */
588 int arm_tune_cortex_a9 = 0;
590 /* Nonzero if generating Thumb instructions. */
593 /* Nonzero if we should define __THUMB_INTERWORK__ in the
595 XXX This is a bit of a hack, it's intended to help work around
596 problems in GLD which doesn't understand that armv5t code is
597 interworking clean. */
598 int arm_cpp_interwork = 0;
600 /* Nonzero if chip supports Thumb 2. */
603 /* Nonzero if chip supports integer division instruction. */
606 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
607 must report the mode of the memory reference from PRINT_OPERAND to
608 PRINT_OPERAND_ADDRESS. */
609 enum machine_mode output_memory_reference_mode;
611 /* The register number to be used for the PIC offset register. */
612 unsigned arm_pic_register = INVALID_REGNUM;
614 /* Set to 1 after arm_reorg has started. Reset to start at the start of
615 the next function. */
616 static int after_arm_reorg = 0;
618 /* The maximum number of insns to be used when loading a constant. */
619 static int arm_constant_limit = 3;
621 /* For an explanation of these variables, see final_prescan_insn below. */
623 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
624 enum arm_cond_code arm_current_cc;
626 int arm_target_label;
627 /* The number of conditionally executed insns, including the current insn. */
628 int arm_condexec_count = 0;
629 /* A bitmask specifying the patterns for the IT block.
630 Zero means do not output an IT block before this insn. */
631 int arm_condexec_mask = 0;
632 /* The number of bits used in arm_condexec_mask. */
633 int arm_condexec_masklen = 0;
635 /* The condition codes of the ARM, and the inverse function. */
636 static const char * const arm_condition_codes[] =
638 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
639 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
642 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
643 #define streq(string1, string2) (strcmp (string1, string2) == 0)
645 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
646 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
647 | (1 << PIC_OFFSET_TABLE_REGNUM)))
649 /* Initialization code. */
653 const char *const name;
654 enum processor_type core;
656 const unsigned long flags;
657 bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
660 /* Not all of these give usefully different compilation alternatives,
661 but there is no simple way of generalizing them. */
662 static const struct processors all_cores[] =
665 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
666 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
667 #include "arm-cores.def"
669 {NULL, arm_none, NULL, 0, NULL}
672 static const struct processors all_architectures[] =
674 /* ARM Architectures */
675 /* We don't specify rtx_costs here as it will be figured out
678 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
679 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
680 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
681 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
682 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
683 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
684 implementations that support it, so we will leave it out for now. */
685 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
686 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
687 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
688 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
689 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
690 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
691 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
692 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
693 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
694 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
695 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
696 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
697 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
698 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
699 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
700 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
701 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
702 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
703 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
704 {NULL, arm_none, NULL, 0 , NULL}
707 struct arm_cpu_select
711 const struct processors * processors;
714 /* This is a magic structure. The 'string' field is magically filled in
715 with a pointer to the value specified by the user on the command line
716 assuming that the user has specified such a value. */
718 static struct arm_cpu_select arm_select[] =
720 /* string name processors */
721 { NULL, "-mcpu=", all_cores },
722 { NULL, "-march=", all_architectures },
723 { NULL, "-mtune=", all_cores }
726 /* Defines representing the indexes into the above table. */
727 #define ARM_OPT_SET_CPU 0
728 #define ARM_OPT_SET_ARCH 1
729 #define ARM_OPT_SET_TUNE 2
731 /* The name of the preprocessor macro to define for this architecture. */
733 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
742 /* Available values for -mfpu=. */
744 static const struct fpu_desc all_fpus[] =
746 {"fpa", FPUTYPE_FPA},
747 {"fpe2", FPUTYPE_FPA_EMU2},
748 {"fpe3", FPUTYPE_FPA_EMU2},
749 {"maverick", FPUTYPE_MAVERICK},
750 {"vfp", FPUTYPE_VFP},
751 {"vfp3", FPUTYPE_VFP3},
752 {"vfpv3", FPUTYPE_VFP3},
753 {"vfpv3-d16", FPUTYPE_VFP3D16},
754 {"neon", FPUTYPE_NEON},
755 {"neon-fp16", FPUTYPE_NEON_FP16}
759 /* Floating point models used by the different hardware.
760 See fputype in arm.h. */
762 static const enum arm_fp_model fp_model_for_fpu[] =
764 /* No FP hardware. */
765 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
766 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
767 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
768 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
769 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
770 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
771 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */
772 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
773 ARM_FP_MODEL_VFP, /* FPUTYPE_NEON */
774 ARM_FP_MODEL_VFP /* FPUTYPE_NEON_FP16 */
781 enum float_abi_type abi_type;
785 /* Available values for -mfloat-abi=. */
787 static const struct float_abi all_float_abis[] =
789 {"soft", ARM_FLOAT_ABI_SOFT},
790 {"softfp", ARM_FLOAT_ABI_SOFTFP},
791 {"hard", ARM_FLOAT_ABI_HARD}
798 enum arm_fp16_format_type fp16_format_type;
802 /* Available values for -mfp16-format=. */
804 static const struct fp16_format all_fp16_formats[] =
806 {"none", ARM_FP16_FORMAT_NONE},
807 {"ieee", ARM_FP16_FORMAT_IEEE},
808 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
815 enum arm_abi_type abi_type;
819 /* Available values for -mabi=. */
821 static const struct abi_name arm_all_abis[] =
823 {"apcs-gnu", ARM_ABI_APCS},
824 {"atpcs", ARM_ABI_ATPCS},
825 {"aapcs", ARM_ABI_AAPCS},
826 {"iwmmxt", ARM_ABI_IWMMXT},
827 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
830 /* Supported TLS relocations. */
840 /* Emit an insn that's a simple single-set. Both the operands must be known
843 emit_set_insn (rtx x, rtx y)
845 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
848 /* Return the number of bits set in VALUE. */
850 bit_count (unsigned long value)
852 unsigned long count = 0;
857 value &= value - 1; /* Clear the least-significant set bit. */
863 /* Set up library functions unique to ARM. */
866 arm_init_libfuncs (void)
868 /* There are no special library functions unless we are using the
873 /* The functions below are described in Section 4 of the "Run-Time
874 ABI for the ARM architecture", Version 1.0. */
876 /* Double-precision floating-point arithmetic. Table 2. */
877 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
878 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
879 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
880 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
881 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
883 /* Double-precision comparisons. Table 3. */
884 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
885 set_optab_libfunc (ne_optab, DFmode, NULL);
886 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
887 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
888 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
889 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
890 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
892 /* Single-precision floating-point arithmetic. Table 4. */
893 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
894 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
895 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
896 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
897 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
899 /* Single-precision comparisons. Table 5. */
900 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
901 set_optab_libfunc (ne_optab, SFmode, NULL);
902 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
903 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
904 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
905 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
906 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
908 /* Floating-point to integer conversions. Table 6. */
909 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
910 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
911 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
912 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
913 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
914 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
915 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
916 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
918 /* Conversions between floating types. Table 7. */
919 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
920 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
922 /* Integer to floating-point conversions. Table 8. */
923 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
924 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
925 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
926 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
927 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
928 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
929 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
930 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
932 /* Long long. Table 9. */
933 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
934 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
935 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
936 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
937 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
938 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
939 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
940 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
942 /* Integer (32/32->32) division. \S 4.3.1. */
943 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
944 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
946 /* The divmod functions are designed so that they can be used for
947 plain division, even though they return both the quotient and the
948 remainder. The quotient is returned in the usual location (i.e.,
949 r0 for SImode, {r0, r1} for DImode), just as would be expected
950 for an ordinary division routine. Because the AAPCS calling
951 conventions specify that all of { r0, r1, r2, r3 } are
952 callee-saved registers, there is no need to tell the compiler
953 explicitly that those registers are clobbered by these
955 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
956 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
958 /* For SImode division the ABI provides div-without-mod routines,
960 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
961 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
963 /* We don't have mod libcalls. Fortunately gcc knows how to use the
964 divmod libcalls instead. */
965 set_optab_libfunc (smod_optab, DImode, NULL);
966 set_optab_libfunc (umod_optab, DImode, NULL);
967 set_optab_libfunc (smod_optab, SImode, NULL);
968 set_optab_libfunc (umod_optab, SImode, NULL);
970 /* Half-precision float operations. The compiler handles all operations
971 with NULL libfuncs by converting the SFmode. */
972 switch (arm_fp16_format)
974 case ARM_FP16_FORMAT_IEEE:
975 case ARM_FP16_FORMAT_ALTERNATIVE:
978 set_conv_libfunc (trunc_optab, HFmode, SFmode,
979 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
981 : "__gnu_f2h_alternative"));
982 set_conv_libfunc (sext_optab, SFmode, HFmode,
983 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
985 : "__gnu_h2f_alternative"));
988 set_optab_libfunc (add_optab, HFmode, NULL);
989 set_optab_libfunc (sdiv_optab, HFmode, NULL);
990 set_optab_libfunc (smul_optab, HFmode, NULL);
991 set_optab_libfunc (neg_optab, HFmode, NULL);
992 set_optab_libfunc (sub_optab, HFmode, NULL);
995 set_optab_libfunc (eq_optab, HFmode, NULL);
996 set_optab_libfunc (ne_optab, HFmode, NULL);
997 set_optab_libfunc (lt_optab, HFmode, NULL);
998 set_optab_libfunc (le_optab, HFmode, NULL);
999 set_optab_libfunc (ge_optab, HFmode, NULL);
1000 set_optab_libfunc (gt_optab, HFmode, NULL);
1001 set_optab_libfunc (unord_optab, HFmode, NULL);
1009 /* On AAPCS systems, this is the "struct __va_list". */
1010 static GTY(()) tree va_list_type;
1012 /* Return the type to use as __builtin_va_list. */
1014 arm_build_builtin_va_list (void)
1019 if (!TARGET_AAPCS_BASED)
1020 return std_build_builtin_va_list ();
1022 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1030 The C Library ABI further reinforces this definition in \S
1033 We must follow this definition exactly. The structure tag
1034 name is visible in C++ mangled names, and thus forms a part
1035 of the ABI. The field name may be used by people who
1036 #include <stdarg.h>. */
1037 /* Create the type. */
1038 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1039 /* Give it the required name. */
1040 va_list_name = build_decl (BUILTINS_LOCATION,
1042 get_identifier ("__va_list"),
1044 DECL_ARTIFICIAL (va_list_name) = 1;
1045 TYPE_NAME (va_list_type) = va_list_name;
1046 /* Create the __ap field. */
1047 ap_field = build_decl (BUILTINS_LOCATION,
1049 get_identifier ("__ap"),
1051 DECL_ARTIFICIAL (ap_field) = 1;
1052 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1053 TYPE_FIELDS (va_list_type) = ap_field;
1054 /* Compute its layout. */
1055 layout_type (va_list_type);
1057 return va_list_type;
1060 /* Return an expression of type "void *" pointing to the next
1061 available argument in a variable-argument list. VALIST is the
1062 user-level va_list object, of type __builtin_va_list. */
1064 arm_extract_valist_ptr (tree valist)
1066 if (TREE_TYPE (valist) == error_mark_node)
1067 return error_mark_node;
1069 /* On an AAPCS target, the pointer is stored within "struct
1071 if (TARGET_AAPCS_BASED)
1073 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1074 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1075 valist, ap_field, NULL_TREE);
1081 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1083 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1085 valist = arm_extract_valist_ptr (valist);
1086 std_expand_builtin_va_start (valist, nextarg);
1089 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1091 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1094 valist = arm_extract_valist_ptr (valist);
1095 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1098 /* Implement TARGET_HANDLE_OPTION. */
1101 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1106 arm_select[1].string = arg;
1110 arm_select[0].string = arg;
1113 case OPT_mhard_float:
1114 target_float_abi_name = "hard";
1117 case OPT_msoft_float:
1118 target_float_abi_name = "soft";
1122 arm_select[2].string = arg;
1131 arm_target_help (void)
1134 static int columns = 0;
1137 /* If we have not done so already, obtain the desired maximum width of
1138 the output. Note - this is a duplication of the code at the start of
1139 gcc/opts.c:print_specific_help() - the two copies should probably be
1140 replaced by a single function. */
1145 GET_ENVIRONMENT (p, "COLUMNS");
1148 int value = atoi (p);
1155 /* Use a reasonable default. */
1159 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1161 /* The - 2 is because we know that the last entry in the array is NULL. */
1162 i = ARRAY_SIZE (all_cores) - 2;
1164 printf (" %s", all_cores[i].name);
1165 remaining = columns - (strlen (all_cores[i].name) + 4);
1166 gcc_assert (remaining >= 0);
1170 int len = strlen (all_cores[i].name);
1172 if (remaining > len + 2)
1174 printf (", %s", all_cores[i].name);
1175 remaining -= len + 2;
1181 printf ("\n %s", all_cores[i].name);
1182 remaining = columns - (len + 4);
1186 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1188 i = ARRAY_SIZE (all_architectures) - 2;
1191 printf (" %s", all_architectures[i].name);
1192 remaining = columns - (strlen (all_architectures[i].name) + 4);
1193 gcc_assert (remaining >= 0);
1197 int len = strlen (all_architectures[i].name);
1199 if (remaining > len + 2)
1201 printf (", %s", all_architectures[i].name);
1202 remaining -= len + 2;
1208 printf ("\n %s", all_architectures[i].name);
1209 remaining = columns - (len + 4);
1216 /* Fix up any incompatible options that the user has specified.
1217 This has now turned into a maze. */
1219 arm_override_options (void)
1222 enum processor_type target_arch_cpu = arm_none;
1223 enum processor_type selected_cpu = arm_none;
1225 /* Set up the flags based on the cpu/architecture selected by the user. */
1226 for (i = ARRAY_SIZE (arm_select); i--;)
1228 struct arm_cpu_select * ptr = arm_select + i;
1230 if (ptr->string != NULL && ptr->string[0] != '\0')
1232 const struct processors * sel;
1234 for (sel = ptr->processors; sel->name != NULL; sel++)
1235 if (streq (ptr->string, sel->name))
1237 /* Set the architecture define. */
1238 if (i != ARM_OPT_SET_TUNE)
1239 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1241 /* Determine the processor core for which we should
1242 tune code-generation. */
1243 if (/* -mcpu= is a sensible default. */
1244 i == ARM_OPT_SET_CPU
1245 /* -mtune= overrides -mcpu= and -march=. */
1246 || i == ARM_OPT_SET_TUNE)
1247 arm_tune = (enum processor_type) (sel - ptr->processors);
1249 /* Remember the CPU associated with this architecture.
1250 If no other option is used to set the CPU type,
1251 we'll use this to guess the most suitable tuning
1253 if (i == ARM_OPT_SET_ARCH)
1254 target_arch_cpu = sel->core;
1256 if (i == ARM_OPT_SET_CPU)
1257 selected_cpu = (enum processor_type) (sel - ptr->processors);
1259 if (i != ARM_OPT_SET_TUNE)
1261 /* If we have been given an architecture and a processor
1262 make sure that they are compatible. We only generate
1263 a warning though, and we prefer the CPU over the
1265 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1266 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1269 insn_flags = sel->flags;
1275 if (sel->name == NULL)
1276 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1280 /* Guess the tuning options from the architecture if necessary. */
1281 if (arm_tune == arm_none)
1282 arm_tune = target_arch_cpu;
1284 /* If the user did not specify a processor, choose one for them. */
1285 if (insn_flags == 0)
1287 const struct processors * sel;
1288 unsigned int sought;
1290 selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
1291 if (selected_cpu == arm_none)
1293 #ifdef SUBTARGET_CPU_DEFAULT
1294 /* Use the subtarget default CPU if none was specified by
1296 selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;
1298 /* Default to ARM6. */
1299 if (selected_cpu == arm_none)
1300 selected_cpu = arm6;
1302 sel = &all_cores[selected_cpu];
1304 insn_flags = sel->flags;
1306 /* Now check to see if the user has specified some command line
1307 switch that require certain abilities from the cpu. */
1310 if (TARGET_INTERWORK || TARGET_THUMB)
1312 sought |= (FL_THUMB | FL_MODE32);
1314 /* There are no ARM processors that support both APCS-26 and
1315 interworking. Therefore we force FL_MODE26 to be removed
1316 from insn_flags here (if it was set), so that the search
1317 below will always be able to find a compatible processor. */
1318 insn_flags &= ~FL_MODE26;
1321 if (sought != 0 && ((sought & insn_flags) != sought))
1323 /* Try to locate a CPU type that supports all of the abilities
1324 of the default CPU, plus the extra abilities requested by
1326 for (sel = all_cores; sel->name != NULL; sel++)
1327 if ((sel->flags & sought) == (sought | insn_flags))
1330 if (sel->name == NULL)
1332 unsigned current_bit_count = 0;
1333 const struct processors * best_fit = NULL;
1335 /* Ideally we would like to issue an error message here
1336 saying that it was not possible to find a CPU compatible
1337 with the default CPU, but which also supports the command
1338 line options specified by the programmer, and so they
1339 ought to use the -mcpu=<name> command line option to
1340 override the default CPU type.
1342 If we cannot find a cpu that has both the
1343 characteristics of the default cpu and the given
1344 command line options we scan the array again looking
1345 for a best match. */
1346 for (sel = all_cores; sel->name != NULL; sel++)
1347 if ((sel->flags & sought) == sought)
1351 count = bit_count (sel->flags & insn_flags);
1353 if (count >= current_bit_count)
1356 current_bit_count = count;
1360 gcc_assert (best_fit);
1364 insn_flags = sel->flags;
1366 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1367 arm_default_cpu = (enum processor_type) (sel - all_cores);
1368 if (arm_tune == arm_none)
1369 arm_tune = arm_default_cpu;
1372 /* The processor for which we should tune should now have been
1374 gcc_assert (arm_tune != arm_none);
1376 tune_flags = all_cores[(int)arm_tune].flags;
1378 if (target_fp16_format_name)
1380 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1382 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1384 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1388 if (i == ARRAY_SIZE (all_fp16_formats))
1389 error ("invalid __fp16 format option: -mfp16-format=%s",
1390 target_fp16_format_name);
1393 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1395 if (target_abi_name)
1397 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1399 if (streq (arm_all_abis[i].name, target_abi_name))
1401 arm_abi = arm_all_abis[i].abi_type;
1405 if (i == ARRAY_SIZE (arm_all_abis))
1406 error ("invalid ABI option: -mabi=%s", target_abi_name);
1409 arm_abi = ARM_DEFAULT_ABI;
1411 /* Make sure that the processor choice does not conflict with any of the
1412 other command line choices. */
1413 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1414 error ("target CPU does not support ARM mode");
1416 /* BPABI targets use linker tricks to allow interworking on cores
1417 without thumb support. */
1418 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1420 warning (0, "target CPU does not support interworking" );
1421 target_flags &= ~MASK_INTERWORK;
1424 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1426 warning (0, "target CPU does not support THUMB instructions");
1427 target_flags &= ~MASK_THUMB;
1430 if (TARGET_APCS_FRAME && TARGET_THUMB)
1432 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1433 target_flags &= ~MASK_APCS_FRAME;
1436 /* Callee super interworking implies thumb interworking. Adding
1437 this to the flags here simplifies the logic elsewhere. */
1438 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1439 target_flags |= MASK_INTERWORK;
1441 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1442 from here where no function is being compiled currently. */
1443 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1444 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1446 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1447 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1449 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1450 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1452 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1454 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1455 target_flags |= MASK_APCS_FRAME;
1458 if (TARGET_POKE_FUNCTION_NAME)
1459 target_flags |= MASK_APCS_FRAME;
1461 if (TARGET_APCS_REENT && flag_pic)
1462 error ("-fpic and -mapcs-reent are incompatible");
1464 if (TARGET_APCS_REENT)
1465 warning (0, "APCS reentrant code not supported. Ignored");
1467 /* If this target is normally configured to use APCS frames, warn if they
1468 are turned off and debugging is turned on. */
1470 && write_symbols != NO_DEBUG
1471 && !TARGET_APCS_FRAME
1472 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1473 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1475 if (TARGET_APCS_FLOAT)
1476 warning (0, "passing floating point arguments in fp regs not yet supported");
1478 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1479 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1480 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1481 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1482 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1483 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1484 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1485 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1486 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1487 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1488 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1489 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1491 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1492 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1493 thumb_code = (TARGET_ARM == 0);
1494 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1495 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1496 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1497 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1498 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1500 /* If we are not using the default (ARM mode) section anchor offset
1501 ranges, then set the correct ranges now. */
1504 /* Thumb-1 LDR instructions cannot have negative offsets.
1505 Permissible positive offset ranges are 5-bit (for byte loads),
1506 6-bit (for halfword loads), or 7-bit (for word loads).
1507 Empirical results suggest a 7-bit anchor range gives the best
1508 overall code size. */
1509 targetm.min_anchor_offset = 0;
1510 targetm.max_anchor_offset = 127;
1512 else if (TARGET_THUMB2)
1514 /* The minimum is set such that the total size of the block
1515 for a particular anchor is 248 + 1 + 4095 bytes, which is
1516 divisible by eight, ensuring natural spacing of anchors. */
1517 targetm.min_anchor_offset = -248;
1518 targetm.max_anchor_offset = 4095;
1521 /* V5 code we generate is completely interworking capable, so we turn off
1522 TARGET_INTERWORK here to avoid many tests later on. */
1524 /* XXX However, we must pass the right pre-processor defines to CPP
1525 or GLD can get confused. This is a hack. */
1526 if (TARGET_INTERWORK)
1527 arm_cpp_interwork = 1;
1530 target_flags &= ~MASK_INTERWORK;
1532 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1533 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1535 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1536 error ("iwmmxt abi requires an iwmmxt capable cpu");
1538 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1539 if (target_fpu_name == NULL && target_fpe_name != NULL)
1541 if (streq (target_fpe_name, "2"))
1542 target_fpu_name = "fpe2";
1543 else if (streq (target_fpe_name, "3"))
1544 target_fpu_name = "fpe3";
1546 error ("invalid floating point emulation option: -mfpe=%s",
1549 if (target_fpu_name != NULL)
1551 /* The user specified a FPU. */
1552 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1554 if (streq (all_fpus[i].name, target_fpu_name))
1556 arm_fpu_arch = all_fpus[i].fpu;
1557 arm_fpu_tune = arm_fpu_arch;
1558 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1562 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1563 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1567 #ifdef FPUTYPE_DEFAULT
1568 /* Use the default if it is specified for this platform. */
1569 arm_fpu_arch = FPUTYPE_DEFAULT;
1570 arm_fpu_tune = FPUTYPE_DEFAULT;
1572 /* Pick one based on CPU type. */
1573 /* ??? Some targets assume FPA is the default.
1574 if ((insn_flags & FL_VFP) != 0)
1575 arm_fpu_arch = FPUTYPE_VFP;
1578 if (arm_arch_cirrus)
1579 arm_fpu_arch = FPUTYPE_MAVERICK;
1581 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1583 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1584 arm_fpu_tune = FPUTYPE_FPA;
1586 arm_fpu_tune = arm_fpu_arch;
1587 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1588 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1591 if (target_float_abi_name != NULL)
1593 /* The user specified a FP ABI. */
1594 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1596 if (streq (all_float_abis[i].name, target_float_abi_name))
1598 arm_float_abi = all_float_abis[i].abi_type;
1602 if (i == ARRAY_SIZE (all_float_abis))
1603 error ("invalid floating point abi: -mfloat-abi=%s",
1604 target_float_abi_name);
1607 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1609 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1610 sorry ("-mfloat-abi=hard and VFP");
1612 if (TARGET_AAPCS_BASED
1613 && (arm_fp_model == ARM_FP_MODEL_FPA))
1614 error ("FPA is unsupported in the AAPCS");
1616 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1617 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1618 will ever exist. GCC makes no attempt to support this combination. */
1619 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1620 sorry ("iWMMXt and hardware floating point");
1622 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1623 if (TARGET_THUMB2 && TARGET_IWMMXT)
1624 sorry ("Thumb-2 iWMMXt");
1626 /* __fp16 support currently assumes the core has ldrh. */
1627 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1628 sorry ("__fp16 and no ldrh");
1630 /* If soft-float is specified then don't use FPU. */
1631 if (TARGET_SOFT_FLOAT)
1632 arm_fpu_arch = FPUTYPE_NONE;
1634 /* For arm2/3 there is no need to do any scheduling if there is only
1635 a floating point emulator, or we are doing software floating-point. */
1636 if ((TARGET_SOFT_FLOAT
1637 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1638 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1639 && (tune_flags & FL_MODE32) == 0)
1640 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1642 if (target_thread_switch)
1644 if (strcmp (target_thread_switch, "soft") == 0)
1645 target_thread_pointer = TP_SOFT;
1646 else if (strcmp (target_thread_switch, "auto") == 0)
1647 target_thread_pointer = TP_AUTO;
1648 else if (strcmp (target_thread_switch, "cp15") == 0)
1649 target_thread_pointer = TP_CP15;
1651 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1654 /* Use the cp15 method if it is available. */
1655 if (target_thread_pointer == TP_AUTO)
1657 if (arm_arch6k && !TARGET_THUMB)
1658 target_thread_pointer = TP_CP15;
1660 target_thread_pointer = TP_SOFT;
1663 if (TARGET_HARD_TP && TARGET_THUMB1)
1664 error ("can not use -mtp=cp15 with 16-bit Thumb");
1666 /* Override the default structure alignment for AAPCS ABI. */
1667 if (TARGET_AAPCS_BASED)
1668 arm_structure_size_boundary = 8;
1670 if (structure_size_string != NULL)
1672 int size = strtol (structure_size_string, NULL, 0);
1674 if (size == 8 || size == 32
1675 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1676 arm_structure_size_boundary = size;
1678 warning (0, "structure size boundary can only be set to %s",
1679 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1682 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1684 error ("RTP PIC is incompatible with Thumb");
1688 /* If stack checking is disabled, we can use r10 as the PIC register,
1689 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1690 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1692 if (TARGET_VXWORKS_RTP)
1693 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1694 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1697 if (flag_pic && TARGET_VXWORKS_RTP)
1698 arm_pic_register = 9;
1700 if (arm_pic_register_string != NULL)
1702 int pic_register = decode_reg_name (arm_pic_register_string);
1705 warning (0, "-mpic-register= is useless without -fpic");
1707 /* Prevent the user from choosing an obviously stupid PIC register. */
1708 else if (pic_register < 0 || call_used_regs[pic_register]
1709 || pic_register == HARD_FRAME_POINTER_REGNUM
1710 || pic_register == STACK_POINTER_REGNUM
1711 || pic_register >= PC_REGNUM
1712 || (TARGET_VXWORKS_RTP
1713 && (unsigned int) pic_register != arm_pic_register))
1714 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1716 arm_pic_register = pic_register;
1719 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1720 if (fix_cm3_ldrd == 2)
1722 if (selected_cpu == cortexm3)
1728 /* ??? We might want scheduling for thumb2. */
1729 if (TARGET_THUMB && flag_schedule_insns)
1731 /* Don't warn since it's on by default in -O2. */
1732 flag_schedule_insns = 0;
1737 arm_constant_limit = 1;
1739 /* If optimizing for size, bump the number of instructions that we
1740 are prepared to conditionally execute (even on a StrongARM). */
1741 max_insns_skipped = 6;
1745 /* For processors with load scheduling, it never costs more than
1746 2 cycles to load a constant, and the load scheduler may well
1747 reduce that to 1. */
1749 arm_constant_limit = 1;
1751 /* On XScale the longer latency of a load makes it more difficult
1752 to achieve a good schedule, so it's faster to synthesize
1753 constants that can be done in two insns. */
1754 if (arm_tune_xscale)
1755 arm_constant_limit = 2;
1757 /* StrongARM has early execution of branches, so a sequence
1758 that is worth skipping is shorter. */
1759 if (arm_tune_strongarm)
1760 max_insns_skipped = 3;
1763 /* Register global variables with the garbage collector. */
1764 arm_add_gc_roots ();
1768 arm_add_gc_roots (void)
1770 gcc_obstack_init(&minipool_obstack);
1771 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1774 /* A table of known ARM exception types.
1775 For use with the interrupt function attribute. */
1779 const char *const arg;
1780 const unsigned long return_value;
1784 static const isr_attribute_arg isr_attribute_args [] =
1786 { "IRQ", ARM_FT_ISR },
1787 { "irq", ARM_FT_ISR },
1788 { "FIQ", ARM_FT_FIQ },
1789 { "fiq", ARM_FT_FIQ },
1790 { "ABORT", ARM_FT_ISR },
1791 { "abort", ARM_FT_ISR },
1792 { "ABORT", ARM_FT_ISR },
1793 { "abort", ARM_FT_ISR },
1794 { "UNDEF", ARM_FT_EXCEPTION },
1795 { "undef", ARM_FT_EXCEPTION },
1796 { "SWI", ARM_FT_EXCEPTION },
1797 { "swi", ARM_FT_EXCEPTION },
1798 { NULL, ARM_FT_NORMAL }
1801 /* Returns the (interrupt) function type of the current
1802 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1804 static unsigned long
1805 arm_isr_value (tree argument)
1807 const isr_attribute_arg * ptr;
1811 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1813 /* No argument - default to IRQ. */
1814 if (argument == NULL_TREE)
1817 /* Get the value of the argument. */
1818 if (TREE_VALUE (argument) == NULL_TREE
1819 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1820 return ARM_FT_UNKNOWN;
1822 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1824 /* Check it against the list of known arguments. */
1825 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1826 if (streq (arg, ptr->arg))
1827 return ptr->return_value;
1829 /* An unrecognized interrupt type. */
1830 return ARM_FT_UNKNOWN;
1833 /* Computes the type of the current function. */
1835 static unsigned long
1836 arm_compute_func_type (void)
1838 unsigned long type = ARM_FT_UNKNOWN;
1842 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1844 /* Decide if the current function is volatile. Such functions
1845 never return, and many memory cycles can be saved by not storing
1846 register values that will never be needed again. This optimization
1847 was added to speed up context switching in a kernel application. */
1849 && (TREE_NOTHROW (current_function_decl)
1850 || !(flag_unwind_tables
1851 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1852 && TREE_THIS_VOLATILE (current_function_decl))
1853 type |= ARM_FT_VOLATILE;
1855 if (cfun->static_chain_decl != NULL)
1856 type |= ARM_FT_NESTED;
1858 attr = DECL_ATTRIBUTES (current_function_decl);
1860 a = lookup_attribute ("naked", attr);
1862 type |= ARM_FT_NAKED;
1864 a = lookup_attribute ("isr", attr);
1866 a = lookup_attribute ("interrupt", attr);
1869 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1871 type |= arm_isr_value (TREE_VALUE (a));
1876 /* Returns the type of the current function. */
1879 arm_current_func_type (void)
1881 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1882 cfun->machine->func_type = arm_compute_func_type ();
1884 return cfun->machine->func_type;
1888 arm_allocate_stack_slots_for_args (void)
1890 /* Naked functions should not allocate stack slots for arguments. */
1891 return !IS_NAKED (arm_current_func_type ());
1895 /* Return 1 if it is possible to return using a single instruction.
1896 If SIBLING is non-null, this is a test for a return before a sibling
1897 call. SIBLING is the call insn, so we can examine its register usage. */
1900 use_return_insn (int iscond, rtx sibling)
1903 unsigned int func_type;
1904 unsigned long saved_int_regs;
1905 unsigned HOST_WIDE_INT stack_adjust;
1906 arm_stack_offsets *offsets;
1908 /* Never use a return instruction before reload has run. */
1909 if (!reload_completed)
1912 func_type = arm_current_func_type ();
1914 /* Naked, volatile and stack alignment functions need special
1916 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
1919 /* So do interrupt functions that use the frame pointer and Thumb
1920 interrupt functions. */
1921 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
1924 offsets = arm_get_frame_offsets ();
1925 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1927 /* As do variadic functions. */
1928 if (crtl->args.pretend_args_size
1929 || cfun->machine->uses_anonymous_args
1930 /* Or if the function calls __builtin_eh_return () */
1931 || crtl->calls_eh_return
1932 /* Or if the function calls alloca */
1933 || cfun->calls_alloca
1934 /* Or if there is a stack adjustment. However, if the stack pointer
1935 is saved on the stack, we can use a pre-incrementing stack load. */
1936 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
1937 && stack_adjust == 4)))
1940 saved_int_regs = offsets->saved_regs_mask;
1942 /* Unfortunately, the insn
1944 ldmib sp, {..., sp, ...}
1946 triggers a bug on most SA-110 based devices, such that the stack
1947 pointer won't be correctly restored if the instruction takes a
1948 page fault. We work around this problem by popping r3 along with
1949 the other registers, since that is never slower than executing
1950 another instruction.
1952 We test for !arm_arch5 here, because code for any architecture
1953 less than this could potentially be run on one of the buggy
1955 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
1957 /* Validate that r3 is a call-clobbered register (always true in
1958 the default abi) ... */
1959 if (!call_used_regs[3])
1962 /* ... that it isn't being used for a return value ... */
1963 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1966 /* ... or for a tail-call argument ... */
1969 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1971 if (find_regno_fusage (sibling, USE, 3))
1975 /* ... and that there are no call-saved registers in r0-r2
1976 (always true in the default ABI). */
1977 if (saved_int_regs & 0x7)
1981 /* Can't be done if interworking with Thumb, and any registers have been
1983 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
1986 /* On StrongARM, conditional returns are expensive if they aren't
1987 taken and multiple registers have been stacked. */
1988 if (iscond && arm_tune_strongarm)
1990 /* Conditional return when just the LR is stored is a simple
1991 conditional-load instruction, that's not expensive. */
1992 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1996 && arm_pic_register != INVALID_REGNUM
1997 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2001 /* If there are saved registers but the LR isn't saved, then we need
2002 two instructions for the return. */
2003 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2006 /* Can't be done if any of the FPA regs are pushed,
2007 since this also requires an insn. */
2008 if (TARGET_HARD_FLOAT && TARGET_FPA)
2009 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2010 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2013 /* Likewise VFP regs. */
2014 if (TARGET_HARD_FLOAT && TARGET_VFP)
2015 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2016 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2019 if (TARGET_REALLY_IWMMXT)
2020 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2021 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2027 /* Return TRUE if int I is a valid immediate ARM constant. */
2030 const_ok_for_arm (HOST_WIDE_INT i)
2034 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2035 be all zero, or all one. */
2036 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2037 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2038 != ((~(unsigned HOST_WIDE_INT) 0)
2039 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2042 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2044 /* Fast return for 0 and small values. We must do this for zero, since
2045 the code below can't handle that one case. */
2046 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2049 /* Get the number of trailing zeros. */
2050 lowbit = ffs((int) i) - 1;
2052 /* Only even shifts are allowed in ARM mode so round down to the
2053 nearest even number. */
2057 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2062 /* Allow rotated constants in ARM mode. */
2064 && ((i & ~0xc000003f) == 0
2065 || (i & ~0xf000000f) == 0
2066 || (i & ~0xfc000003) == 0))
2073 /* Allow repeated pattern. */
2076 if (i == v || i == (v | (v << 8)))
2083 /* Return true if I is a valid constant for the operation CODE. */
2085 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2087 if (const_ok_for_arm (i))
2111 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2113 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2119 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2123 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2130 /* Emit a sequence of insns to handle a large constant.
2131 CODE is the code of the operation required, it can be any of SET, PLUS,
2132 IOR, AND, XOR, MINUS;
2133 MODE is the mode in which the operation is being performed;
2134 VAL is the integer to operate on;
2135 SOURCE is the other operand (a register, or a null-pointer for SET);
2136 SUBTARGETS means it is safe to create scratch registers if that will
2137 either produce a simpler sequence, or we will want to cse the values.
2138 Return value is the number of insns emitted. */
2140 /* ??? Tweak this for thumb2. */
2142 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2143 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2147 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2148 cond = COND_EXEC_TEST (PATTERN (insn));
2152 if (subtargets || code == SET
2153 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2154 && REGNO (target) != REGNO (source)))
2156 /* After arm_reorg has been called, we can't fix up expensive
2157 constants by pushing them into memory so we must synthesize
2158 them in-line, regardless of the cost. This is only likely to
2159 be more costly on chips that have load delay slots and we are
2160 compiling without running the scheduler (so no splitting
2161 occurred before the final instruction emission).
2163 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2165 if (!after_arm_reorg
2167 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2169 > arm_constant_limit + (code != SET)))
2173 /* Currently SET is the only monadic value for CODE, all
2174 the rest are diadic. */
2175 if (TARGET_USE_MOVT)
2176 arm_emit_movpair (target, GEN_INT (val));
2178 emit_set_insn (target, GEN_INT (val));
2184 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2186 if (TARGET_USE_MOVT)
2187 arm_emit_movpair (temp, GEN_INT (val));
2189 emit_set_insn (temp, GEN_INT (val));
2191 /* For MINUS, the value is subtracted from, since we never
2192 have subtraction of a constant. */
2194 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2196 emit_set_insn (target,
2197 gen_rtx_fmt_ee (code, mode, source, temp));
2203 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2207 /* Return the number of ARM instructions required to synthesize the given
2210 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2212 HOST_WIDE_INT temp1;
2220 if (remainder & (3 << (i - 2)))
2225 temp1 = remainder & ((0x0ff << end)
2226 | ((i < end) ? (0xff >> (32 - end)) : 0));
2227 remainder &= ~temp1;
2232 } while (remainder);
2236 /* Emit an instruction with the indicated PATTERN. If COND is
2237 non-NULL, conditionalize the execution of the instruction on COND
2241 emit_constant_insn (rtx cond, rtx pattern)
2244 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2245 emit_insn (pattern);
2248 /* As above, but extra parameter GENERATE which, if clear, suppresses
2250 /* ??? This needs more work for thumb2. */
2253 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2254 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2259 int can_negate_initial = 0;
2262 int num_bits_set = 0;
2263 int set_sign_bit_copies = 0;
2264 int clear_sign_bit_copies = 0;
2265 int clear_zero_bit_copies = 0;
2266 int set_zero_bit_copies = 0;
2268 unsigned HOST_WIDE_INT temp1, temp2;
2269 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2271 /* Find out which operations are safe for a given CODE. Also do a quick
2272 check for degenerate cases; these can occur when DImode operations
2284 can_negate_initial = 1;
2288 if (remainder == 0xffffffff)
2291 emit_constant_insn (cond,
2292 gen_rtx_SET (VOIDmode, target,
2293 GEN_INT (ARM_SIGN_EXTEND (val))));
2299 if (reload_completed && rtx_equal_p (target, source))
2303 emit_constant_insn (cond,
2304 gen_rtx_SET (VOIDmode, target, source));
2316 emit_constant_insn (cond,
2317 gen_rtx_SET (VOIDmode, target, const0_rtx));
2320 if (remainder == 0xffffffff)
2322 if (reload_completed && rtx_equal_p (target, source))
2325 emit_constant_insn (cond,
2326 gen_rtx_SET (VOIDmode, target, source));
2335 if (reload_completed && rtx_equal_p (target, source))
2338 emit_constant_insn (cond,
2339 gen_rtx_SET (VOIDmode, target, source));
2343 /* We don't know how to handle other cases yet. */
2344 gcc_assert (remainder == 0xffffffff);
2347 emit_constant_insn (cond,
2348 gen_rtx_SET (VOIDmode, target,
2349 gen_rtx_NOT (mode, source)));
2353 /* We treat MINUS as (val - source), since (source - val) is always
2354 passed as (source + (-val)). */
2358 emit_constant_insn (cond,
2359 gen_rtx_SET (VOIDmode, target,
2360 gen_rtx_NEG (mode, source)));
2363 if (const_ok_for_arm (val))
2366 emit_constant_insn (cond,
2367 gen_rtx_SET (VOIDmode, target,
2368 gen_rtx_MINUS (mode, GEN_INT (val),
2380 /* If we can do it in one insn get out quickly. */
2381 if (const_ok_for_arm (val)
2382 || (can_negate_initial && const_ok_for_arm (-val))
2383 || (can_invert && const_ok_for_arm (~val)))
2386 emit_constant_insn (cond,
2387 gen_rtx_SET (VOIDmode, target,
2389 ? gen_rtx_fmt_ee (code, mode, source,
2395 /* Calculate a few attributes that may be useful for specific
2397 /* Count number of leading zeros. */
2398 for (i = 31; i >= 0; i--)
2400 if ((remainder & (1 << i)) == 0)
2401 clear_sign_bit_copies++;
2406 /* Count number of leading 1's. */
2407 for (i = 31; i >= 0; i--)
2409 if ((remainder & (1 << i)) != 0)
2410 set_sign_bit_copies++;
2415 /* Count number of trailing zero's. */
2416 for (i = 0; i <= 31; i++)
2418 if ((remainder & (1 << i)) == 0)
2419 clear_zero_bit_copies++;
2424 /* Count number of trailing 1's. */
2425 for (i = 0; i <= 31; i++)
2427 if ((remainder & (1 << i)) != 0)
2428 set_zero_bit_copies++;
2436 /* See if we can use movw. */
2437 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2440 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2445 /* See if we can do this by sign_extending a constant that is known
2446 to be negative. This is a good, way of doing it, since the shift
2447 may well merge into a subsequent insn. */
2448 if (set_sign_bit_copies > 1)
2450 if (const_ok_for_arm
2451 (temp1 = ARM_SIGN_EXTEND (remainder
2452 << (set_sign_bit_copies - 1))))
2456 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2457 emit_constant_insn (cond,
2458 gen_rtx_SET (VOIDmode, new_src,
2460 emit_constant_insn (cond,
2461 gen_ashrsi3 (target, new_src,
2462 GEN_INT (set_sign_bit_copies - 1)));
2466 /* For an inverted constant, we will need to set the low bits,
2467 these will be shifted out of harm's way. */
2468 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2469 if (const_ok_for_arm (~temp1))
2473 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2474 emit_constant_insn (cond,
2475 gen_rtx_SET (VOIDmode, new_src,
2477 emit_constant_insn (cond,
2478 gen_ashrsi3 (target, new_src,
2479 GEN_INT (set_sign_bit_copies - 1)));
2485 /* See if we can calculate the value as the difference between two
2486 valid immediates. */
2487 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2489 int topshift = clear_sign_bit_copies & ~1;
2491 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2492 & (0xff000000 >> topshift));
2494 /* If temp1 is zero, then that means the 9 most significant
2495 bits of remainder were 1 and we've caused it to overflow.
2496 When topshift is 0 we don't need to do anything since we
2497 can borrow from 'bit 32'. */
2498 if (temp1 == 0 && topshift != 0)
2499 temp1 = 0x80000000 >> (topshift - 1);
2501 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2503 if (const_ok_for_arm (temp2))
2507 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2508 emit_constant_insn (cond,
2509 gen_rtx_SET (VOIDmode, new_src,
2511 emit_constant_insn (cond,
2512 gen_addsi3 (target, new_src,
2520 /* See if we can generate this by setting the bottom (or the top)
2521 16 bits, and then shifting these into the other half of the
2522 word. We only look for the simplest cases, to do more would cost
2523 too much. Be careful, however, not to generate this when the
2524 alternative would take fewer insns. */
2525 if (val & 0xffff0000)
2527 temp1 = remainder & 0xffff0000;
2528 temp2 = remainder & 0x0000ffff;
2530 /* Overlaps outside this range are best done using other methods. */
2531 for (i = 9; i < 24; i++)
2533 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2534 && !const_ok_for_arm (temp2))
2536 rtx new_src = (subtargets
2537 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2539 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2540 source, subtargets, generate);
2548 gen_rtx_ASHIFT (mode, source,
2555 /* Don't duplicate cases already considered. */
2556 for (i = 17; i < 24; i++)
2558 if (((temp1 | (temp1 >> i)) == remainder)
2559 && !const_ok_for_arm (temp1))
2561 rtx new_src = (subtargets
2562 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2564 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2565 source, subtargets, generate);
2570 gen_rtx_SET (VOIDmode, target,
2573 gen_rtx_LSHIFTRT (mode, source,
2584 /* If we have IOR or XOR, and the constant can be loaded in a
2585 single instruction, and we can find a temporary to put it in,
2586 then this can be done in two instructions instead of 3-4. */
2588 /* TARGET can't be NULL if SUBTARGETS is 0 */
2589 || (reload_completed && !reg_mentioned_p (target, source)))
2591 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2595 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2597 emit_constant_insn (cond,
2598 gen_rtx_SET (VOIDmode, sub,
2600 emit_constant_insn (cond,
2601 gen_rtx_SET (VOIDmode, target,
2602 gen_rtx_fmt_ee (code, mode,
2613 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2614 and the remainder 0s for e.g. 0xfff00000)
2615 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2617 This can be done in 2 instructions by using shifts with mov or mvn.
2622 mvn r0, r0, lsr #12 */
2623 if (set_sign_bit_copies > 8
2624 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2628 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2629 rtx shift = GEN_INT (set_sign_bit_copies);
2633 gen_rtx_SET (VOIDmode, sub,
2635 gen_rtx_ASHIFT (mode,
2640 gen_rtx_SET (VOIDmode, target,
2642 gen_rtx_LSHIFTRT (mode, sub,
2649 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2651 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2653 For eg. r0 = r0 | 0xfff
2658 if (set_zero_bit_copies > 8
2659 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2663 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2664 rtx shift = GEN_INT (set_zero_bit_copies);
2668 gen_rtx_SET (VOIDmode, sub,
2670 gen_rtx_LSHIFTRT (mode,
2675 gen_rtx_SET (VOIDmode, target,
2677 gen_rtx_ASHIFT (mode, sub,
2683 /* This will never be reached for Thumb2 because orn is a valid
2684 instruction. This is for Thumb1 and the ARM 32 bit cases.
2686 x = y | constant (such that ~constant is a valid constant)
2688 x = ~(~y & ~constant).
2690 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2694 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2695 emit_constant_insn (cond,
2696 gen_rtx_SET (VOIDmode, sub,
2697 gen_rtx_NOT (mode, source)));
2700 sub = gen_reg_rtx (mode);
2701 emit_constant_insn (cond,
2702 gen_rtx_SET (VOIDmode, sub,
2703 gen_rtx_AND (mode, source,
2705 emit_constant_insn (cond,
2706 gen_rtx_SET (VOIDmode, target,
2707 gen_rtx_NOT (mode, sub)));
2714 /* See if two shifts will do 2 or more insn's worth of work. */
2715 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2717 HOST_WIDE_INT shift_mask = ((0xffffffff
2718 << (32 - clear_sign_bit_copies))
2721 if ((remainder | shift_mask) != 0xffffffff)
2725 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2726 insns = arm_gen_constant (AND, mode, cond,
2727 remainder | shift_mask,
2728 new_src, source, subtargets, 1);
2733 rtx targ = subtargets ? NULL_RTX : target;
2734 insns = arm_gen_constant (AND, mode, cond,
2735 remainder | shift_mask,
2736 targ, source, subtargets, 0);
2742 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2743 rtx shift = GEN_INT (clear_sign_bit_copies);
2745 emit_insn (gen_ashlsi3 (new_src, source, shift));
2746 emit_insn (gen_lshrsi3 (target, new_src, shift));
2752 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2754 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2756 if ((remainder | shift_mask) != 0xffffffff)
2760 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2762 insns = arm_gen_constant (AND, mode, cond,
2763 remainder | shift_mask,
2764 new_src, source, subtargets, 1);
2769 rtx targ = subtargets ? NULL_RTX : target;
2771 insns = arm_gen_constant (AND, mode, cond,
2772 remainder | shift_mask,
2773 targ, source, subtargets, 0);
2779 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2780 rtx shift = GEN_INT (clear_zero_bit_copies);
2782 emit_insn (gen_lshrsi3 (new_src, source, shift));
2783 emit_insn (gen_ashlsi3 (target, new_src, shift));
2795 for (i = 0; i < 32; i++)
2796 if (remainder & (1 << i))
2800 || (code != IOR && can_invert && num_bits_set > 16))
2801 remainder = (~remainder) & 0xffffffff;
2802 else if (code == PLUS && num_bits_set > 16)
2803 remainder = (-remainder) & 0xffffffff;
2810 /* Now try and find a way of doing the job in either two or three
2812 We start by looking for the largest block of zeros that are aligned on
2813 a 2-bit boundary, we then fill up the temps, wrapping around to the
2814 top of the word when we drop off the bottom.
2815 In the worst case this code should produce no more than four insns.
2816 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2817 best place to start. */
2819 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2825 int best_consecutive_zeros = 0;
2827 for (i = 0; i < 32; i += 2)
2829 int consecutive_zeros = 0;
2831 if (!(remainder & (3 << i)))
2833 while ((i < 32) && !(remainder & (3 << i)))
2835 consecutive_zeros += 2;
2838 if (consecutive_zeros > best_consecutive_zeros)
2840 best_consecutive_zeros = consecutive_zeros;
2841 best_start = i - consecutive_zeros;
2847 /* So long as it won't require any more insns to do so, it's
2848 desirable to emit a small constant (in bits 0...9) in the last
2849 insn. This way there is more chance that it can be combined with
2850 a later addressing insn to form a pre-indexed load or store
2851 operation. Consider:
2853 *((volatile int *)0xe0000100) = 1;
2854 *((volatile int *)0xe0000110) = 2;
2856 We want this to wind up as:
2860 str rB, [rA, #0x100]
2862 str rB, [rA, #0x110]
2864 rather than having to synthesize both large constants from scratch.
2866 Therefore, we calculate how many insns would be required to emit
2867 the constant starting from `best_start', and also starting from
2868 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2869 yield a shorter sequence, we may as well use zero. */
2871 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2872 && (count_insns_for_constant (remainder, 0) <=
2873 count_insns_for_constant (remainder, best_start)))
2877 /* Now start emitting the insns. */
2885 if (remainder & (3 << (i - 2)))
2890 temp1 = remainder & ((0x0ff << end)
2891 | ((i < end) ? (0xff >> (32 - end)) : 0));
2892 remainder &= ~temp1;
2896 rtx new_src, temp1_rtx;
2898 if (code == SET || code == MINUS)
2900 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2901 if (can_invert && code != MINUS)
2906 if (remainder && subtargets)
2907 new_src = gen_reg_rtx (mode);
2912 else if (can_negate)
2916 temp1 = trunc_int_for_mode (temp1, mode);
2917 temp1_rtx = GEN_INT (temp1);
2921 else if (code == MINUS)
2922 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2924 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2926 emit_constant_insn (cond,
2927 gen_rtx_SET (VOIDmode, new_src,
2937 else if (code == MINUS)
2946 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
2959 /* Canonicalize a comparison so that we are more likely to recognize it.
2960 This can be done for a few constant compares, where we can make the
2961 immediate value easier to load. */
2964 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2967 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2968 unsigned HOST_WIDE_INT maxval;
2969 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2980 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2982 *op1 = GEN_INT (i + 1);
2983 return code == GT ? GE : LT;
2990 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2992 *op1 = GEN_INT (i - 1);
2993 return code == GE ? GT : LE;
2999 if (i != ~((unsigned HOST_WIDE_INT) 0)
3000 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3002 *op1 = GEN_INT (i + 1);
3003 return code == GTU ? GEU : LTU;
3010 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3012 *op1 = GEN_INT (i - 1);
3013 return code == GEU ? GTU : LEU;
3025 /* Define how to find the value returned by a function. */
3028 arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
3030 enum machine_mode mode;
3031 int unsignedp ATTRIBUTE_UNUSED;
3032 rtx r ATTRIBUTE_UNUSED;
3034 mode = TYPE_MODE (type);
3035 /* Promote integer types. */
3036 if (INTEGRAL_TYPE_P (type))
3037 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
3039 /* Promotes small structs returned in a register to full-word size
3040 for big-endian AAPCS. */
3041 if (arm_return_in_msb (type))
3043 HOST_WIDE_INT size = int_size_in_bytes (type);
3044 if (size % UNITS_PER_WORD != 0)
3046 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3047 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3051 return LIBCALL_VALUE(mode);
3054 /* Determine the amount of memory needed to store the possible return
3055 registers of an untyped call. */
3057 arm_apply_result_size (void)
3063 if (TARGET_HARD_FLOAT_ABI)
3067 if (TARGET_MAVERICK)
3070 if (TARGET_IWMMXT_ABI)
3077 /* Decide whether a type should be returned in memory (true)
3078 or in a register (false). This is called as the target hook
3079 TARGET_RETURN_IN_MEMORY. */
3081 arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
3085 size = int_size_in_bytes (type);
3087 /* Vector values should be returned using ARM registers, not memory (unless
3088 they're over 16 bytes, which will break since we only have four
3089 call-clobbered registers to play with). */
3090 if (TREE_CODE (type) == VECTOR_TYPE)
3091 return (size < 0 || size > (4 * UNITS_PER_WORD));
3093 if (!AGGREGATE_TYPE_P (type) &&
3094 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
3095 /* All simple types are returned in registers.
3096 For AAPCS, complex types are treated the same as aggregates. */
3099 if (arm_abi != ARM_ABI_APCS)
3101 /* ATPCS and later return aggregate types in memory only if they are
3102 larger than a word (or are variable size). */
3103 return (size < 0 || size > UNITS_PER_WORD);
3106 /* For the arm-wince targets we choose to be compatible with Microsoft's
3107 ARM and Thumb compilers, which always return aggregates in memory. */
3109 /* All structures/unions bigger than one word are returned in memory.
3110 Also catch the case where int_size_in_bytes returns -1. In this case
3111 the aggregate is either huge or of variable size, and in either case
3112 we will want to return it via memory and not in a register. */
3113 if (size < 0 || size > UNITS_PER_WORD)
3116 if (TREE_CODE (type) == RECORD_TYPE)
3120 /* For a struct the APCS says that we only return in a register
3121 if the type is 'integer like' and every addressable element
3122 has an offset of zero. For practical purposes this means
3123 that the structure can have at most one non bit-field element
3124 and that this element must be the first one in the structure. */
3126 /* Find the first field, ignoring non FIELD_DECL things which will
3127 have been created by C++. */
3128 for (field = TYPE_FIELDS (type);
3129 field && TREE_CODE (field) != FIELD_DECL;
3130 field = TREE_CHAIN (field))
3134 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
3136 /* Check that the first field is valid for returning in a register. */
3138 /* ... Floats are not allowed */
3139 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3142 /* ... Aggregates that are not themselves valid for returning in
3143 a register are not allowed. */
3144 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3147 /* Now check the remaining fields, if any. Only bitfields are allowed,
3148 since they are not addressable. */
3149 for (field = TREE_CHAIN (field);
3151 field = TREE_CHAIN (field))
3153 if (TREE_CODE (field) != FIELD_DECL)
3156 if (!DECL_BIT_FIELD_TYPE (field))
3163 if (TREE_CODE (type) == UNION_TYPE)
3167 /* Unions can be returned in registers if every element is
3168 integral, or can be returned in an integer register. */
3169 for (field = TYPE_FIELDS (type);
3171 field = TREE_CHAIN (field))
3173 if (TREE_CODE (field) != FIELD_DECL)
3176 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3179 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3185 #endif /* not ARM_WINCE */
3187 /* Return all other types in memory. */
3191 /* Indicate whether or not words of a double are in big-endian order. */
3194 arm_float_words_big_endian (void)
3196 if (TARGET_MAVERICK)
3199 /* For FPA, float words are always big-endian. For VFP, floats words
3200 follow the memory system mode. */
3208 return (TARGET_BIG_END ? 1 : 0);
3213 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3214 for a call to a function whose data type is FNTYPE.
3215 For a library call, FNTYPE is NULL. */
3217 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
3218 rtx libname ATTRIBUTE_UNUSED,
3219 tree fndecl ATTRIBUTE_UNUSED)
3221 /* On the ARM, the offset starts at 0. */
3223 pcum->iwmmxt_nregs = 0;
3224 pcum->can_split = true;
3226 /* Varargs vectors are treated the same as long long.
3227 named_count avoids having to change the way arm handles 'named' */
3228 pcum->named_count = 0;
3231 if (TARGET_REALLY_IWMMXT && fntype)
3235 for (fn_arg = TYPE_ARG_TYPES (fntype);
3237 fn_arg = TREE_CHAIN (fn_arg))
3238 pcum->named_count += 1;
3240 if (! pcum->named_count)
3241 pcum->named_count = INT_MAX;
3246 /* Return true if mode/type need doubleword alignment. */
3248 arm_needs_doubleword_align (enum machine_mode mode, tree type)
3250 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
3251 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
3255 /* Determine where to put an argument to a function.
3256 Value is zero to push the argument on the stack,
3257 or a hard register in which to store the argument.
3259 MODE is the argument's machine mode.
3260 TYPE is the data type of the argument (as a tree).
3261 This is null for libcalls where that information may
3263 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3264 the preceding args and about the function being called.
3265 NAMED is nonzero if this argument is a named parameter
3266 (otherwise it is an extra parameter matching an ellipsis). */
3269 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3270 tree type, int named)
3274 /* Varargs vectors are treated the same as long long.
3275 named_count avoids having to change the way arm handles 'named' */
3276 if (TARGET_IWMMXT_ABI
3277 && arm_vector_mode_supported_p (mode)
3278 && pcum->named_count > pcum->nargs + 1)
3280 if (pcum->iwmmxt_nregs <= 9)
3281 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
3284 pcum->can_split = false;
3289 /* Put doubleword aligned quantities in even register pairs. */
3291 && ARM_DOUBLEWORD_ALIGN
3292 && arm_needs_doubleword_align (mode, type))
3295 if (mode == VOIDmode)
3296 /* Pick an arbitrary value for operand 2 of the call insn. */
3299 /* Only allow splitting an arg between regs and memory if all preceding
3300 args were allocated to regs. For args passed by reference we only count
3301 the reference pointer. */
3302 if (pcum->can_split)
3305 nregs = ARM_NUM_REGS2 (mode, type);
3307 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
3310 return gen_rtx_REG (mode, pcum->nregs);
3314 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3315 tree type, bool named ATTRIBUTE_UNUSED)
3317 int nregs = pcum->nregs;
3319 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
3322 if (NUM_ARG_REGS > nregs
3323 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
3325 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
3330 /* Variable sized types are passed by reference. This is a GCC
3331 extension to the ARM ABI. */
3334 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3335 enum machine_mode mode ATTRIBUTE_UNUSED,
3336 const_tree type, bool named ATTRIBUTE_UNUSED)
3338 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3341 /* Encode the current state of the #pragma [no_]long_calls. */
3344 OFF, /* No #pragma [no_]long_calls is in effect. */
3345 LONG, /* #pragma long_calls is in effect. */
3346 SHORT /* #pragma no_long_calls is in effect. */
3349 static arm_pragma_enum arm_pragma_long_calls = OFF;
3352 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3354 arm_pragma_long_calls = LONG;
3358 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3360 arm_pragma_long_calls = SHORT;
3364 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3366 arm_pragma_long_calls = OFF;
3369 /* Table of machine attributes. */
3370 const struct attribute_spec arm_attribute_table[] =
3372 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
3373 /* Function calls made to this symbol must be done indirectly, because
3374 it may lie outside of the 26 bit addressing range of a normal function
3376 { "long_call", 0, 0, false, true, true, NULL },
3377 /* Whereas these functions are always known to reside within the 26 bit
3378 addressing range. */
3379 { "short_call", 0, 0, false, true, true, NULL },
3380 /* Interrupt Service Routines have special prologue and epilogue requirements. */
3381 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
3382 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
3383 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3385 /* ARM/PE has three new attributes:
3387 dllexport - for exporting a function/variable that will live in a dll
3388 dllimport - for importing a function/variable from a dll
3390 Microsoft allows multiple declspecs in one __declspec, separating
3391 them with spaces. We do NOT support this. Instead, use __declspec
3394 { "dllimport", 0, 0, true, false, false, NULL },
3395 { "dllexport", 0, 0, true, false, false, NULL },
3396 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3397 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
3398 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
3399 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3400 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
3402 { NULL, 0, 0, false, false, false, NULL }
3405 /* Handle an attribute requiring a FUNCTION_DECL;
3406 arguments as in struct attribute_spec.handler. */
3408 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
3409 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
3411 if (TREE_CODE (*node) != FUNCTION_DECL)
3413 warning (OPT_Wattributes, "%qE attribute only applies to functions",
3415 *no_add_attrs = true;
3421 /* Handle an "interrupt" or "isr" attribute;
3422 arguments as in struct attribute_spec.handler. */
3424 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
3429 if (TREE_CODE (*node) != FUNCTION_DECL)
3431 warning (OPT_Wattributes, "%qE attribute only applies to functions",
3433 *no_add_attrs = true;
3435 /* FIXME: the argument if any is checked for type attributes;
3436 should it be checked for decl ones? */
3440 if (TREE_CODE (*node) == FUNCTION_TYPE
3441 || TREE_CODE (*node) == METHOD_TYPE)
3443 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
3445 warning (OPT_Wattributes, "%qE attribute ignored",
3447 *no_add_attrs = true;
3450 else if (TREE_CODE (*node) == POINTER_TYPE
3451 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
3452 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
3453 && arm_isr_value (args) != ARM_FT_UNKNOWN)
3455 *node = build_variant_type_copy (*node);
3456 TREE_TYPE (*node) = build_type_attribute_variant
3458 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
3459 *no_add_attrs = true;
3463 /* Possibly pass this attribute on from the type to a decl. */
3464 if (flags & ((int) ATTR_FLAG_DECL_NEXT
3465 | (int) ATTR_FLAG_FUNCTION_NEXT
3466 | (int) ATTR_FLAG_ARRAY_NEXT))
3468 *no_add_attrs = true;
3469 return tree_cons (name, args, NULL_TREE);
3473 warning (OPT_Wattributes, "%qE attribute ignored",
3482 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
3483 /* Handle the "notshared" attribute. This attribute is another way of
3484 requesting hidden visibility. ARM's compiler supports
3485 "__declspec(notshared)"; we support the same thing via an
3489 arm_handle_notshared_attribute (tree *node,
3490 tree name ATTRIBUTE_UNUSED,
3491 tree args ATTRIBUTE_UNUSED,
3492 int flags ATTRIBUTE_UNUSED,
3495 tree decl = TYPE_NAME (*node);
3499 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
3500 DECL_VISIBILITY_SPECIFIED (decl) = 1;
3501 *no_add_attrs = false;
3507 /* Return 0 if the attributes for two types are incompatible, 1 if they
3508 are compatible, and 2 if they are nearly compatible (which causes a
3509 warning to be generated). */
3511 arm_comp_type_attributes (const_tree type1, const_tree type2)
3515 /* Check for mismatch of non-default calling convention. */
3516 if (TREE_CODE (type1) != FUNCTION_TYPE)
3519 /* Check for mismatched call attributes. */
3520 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
3521 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
3522 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
3523 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
3525 /* Only bother to check if an attribute is defined. */
3526 if (l1 | l2 | s1 | s2)
3528 /* If one type has an attribute, the other must have the same attribute. */
3529 if ((l1 != l2) || (s1 != s2))
3532 /* Disallow mixed attributes. */
3533 if ((l1 & s2) || (l2 & s1))
3537 /* Check for mismatched ISR attribute. */
3538 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3540 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3541 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3543 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3550 /* Assigns default attributes to newly defined type. This is used to
3551 set short_call/long_call attributes for function types of
3552 functions defined inside corresponding #pragma scopes. */
3554 arm_set_default_type_attributes (tree type)
3556 /* Add __attribute__ ((long_call)) to all functions, when
3557 inside #pragma long_calls or __attribute__ ((short_call)),
3558 when inside #pragma no_long_calls. */
3559 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3561 tree type_attr_list, attr_name;
3562 type_attr_list = TYPE_ATTRIBUTES (type);
3564 if (arm_pragma_long_calls == LONG)
3565 attr_name = get_identifier ("long_call");
3566 else if (arm_pragma_long_calls == SHORT)
3567 attr_name = get_identifier ("short_call");
3571 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3572 TYPE_ATTRIBUTES (type) = type_attr_list;
3576 /* Return true if DECL is known to be linked into section SECTION. */
3579 arm_function_in_section_p (tree decl, section *section)
3581 /* We can only be certain about functions defined in the same
3582 compilation unit. */
3583 if (!TREE_STATIC (decl))
3586 /* Make sure that SYMBOL always binds to the definition in this
3587 compilation unit. */
3588 if (!targetm.binds_local_p (decl))
3591 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
3592 if (!DECL_SECTION_NAME (decl))
3594 /* Make sure that we will not create a unique section for DECL. */
3595 if (flag_function_sections || DECL_ONE_ONLY (decl))
3599 return function_section (decl) == section;
3602 /* Return nonzero if a 32-bit "long_call" should be generated for
3603 a call from the current function to DECL. We generate a long_call
3606 a. has an __attribute__((long call))
3607 or b. is within the scope of a #pragma long_calls
3608 or c. the -mlong-calls command line switch has been specified
3610 However we do not generate a long call if the function:
3612 d. has an __attribute__ ((short_call))
3613 or e. is inside the scope of a #pragma no_long_calls
3614 or f. is defined in the same section as the current function. */
3617 arm_is_long_call_p (tree decl)
3622 return TARGET_LONG_CALLS;
3624 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
3625 if (lookup_attribute ("short_call", attrs))
3628 /* For "f", be conservative, and only cater for cases in which the
3629 whole of the current function is placed in the same section. */
3630 if (!flag_reorder_blocks_and_partition
3631 && TREE_CODE (decl) == FUNCTION_DECL
3632 && arm_function_in_section_p (decl, current_function_section ()))
3635 if (lookup_attribute ("long_call", attrs))
3638 return TARGET_LONG_CALLS;
3641 /* Return nonzero if it is ok to make a tail-call to DECL. */
3643 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3645 unsigned long func_type;
3647 if (cfun->machine->sibcall_blocked)
3650 /* Never tailcall something for which we have no decl, or if we
3651 are in Thumb mode. */
3652 if (decl == NULL || TARGET_THUMB)
3655 /* The PIC register is live on entry to VxWorks PLT entries, so we
3656 must make the call before restoring the PIC register. */
3657 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
3660 /* Cannot tail-call to long calls, since these are out of range of
3661 a branch instruction. */
3662 if (arm_is_long_call_p (decl))
3665 /* If we are interworking and the function is not declared static
3666 then we can't tail-call it unless we know that it exists in this
3667 compilation unit (since it might be a Thumb routine). */
3668 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3671 func_type = arm_current_func_type ();
3672 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3673 if (IS_INTERRUPT (func_type))
3676 /* Never tailcall if function may be called with a misaligned SP. */
3677 if (IS_STACKALIGN (func_type))
3680 /* Everything else is ok. */
3685 /* Addressing mode support functions. */
3687 /* Return nonzero if X is a legitimate immediate operand when compiling
3688 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
3690 legitimate_pic_operand_p (rtx x)
3692 if (GET_CODE (x) == SYMBOL_REF
3693 || (GET_CODE (x) == CONST
3694 && GET_CODE (XEXP (x, 0)) == PLUS
3695 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3701 /* Record that the current function needs a PIC register. Initialize
3702 cfun->machine->pic_reg if we have not already done so. */
3705 require_pic_register (void)
3707 /* A lot of the logic here is made obscure by the fact that this
3708 routine gets called as part of the rtx cost estimation process.
3709 We don't want those calls to affect any assumptions about the real
3710 function; and further, we can't call entry_of_function() until we
3711 start the real expansion process. */
3712 if (!crtl->uses_pic_offset_table)
3714 gcc_assert (can_create_pseudo_p ());
3715 if (arm_pic_register != INVALID_REGNUM)
3717 if (!cfun->machine->pic_reg)
3718 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3720 /* Play games to avoid marking the function as needing pic
3721 if we are being called as part of the cost-estimation
3723 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
3724 crtl->uses_pic_offset_table = 1;
3730 if (!cfun->machine->pic_reg)
3731 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3733 /* Play games to avoid marking the function as needing pic
3734 if we are being called as part of the cost-estimation
3736 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
3738 crtl->uses_pic_offset_table = 1;
3741 arm_load_pic_register (0UL);
3745 /* We can be called during expansion of PHI nodes, where
3746 we can't yet emit instructions directly in the final
3747 insn stream. Queue the insns on the entry edge, they will
3748 be committed after everything else is expanded. */
3749 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
3756 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3758 if (GET_CODE (orig) == SYMBOL_REF
3759 || GET_CODE (orig) == LABEL_REF)
3761 rtx pic_ref, address;
3765 /* If this function doesn't have a pic register, create one now. */
3766 require_pic_register ();
3770 gcc_assert (can_create_pseudo_p ());
3771 reg = gen_reg_rtx (Pmode);
3777 address = gen_reg_rtx (Pmode);
3782 emit_insn (gen_pic_load_addr_arm (address, orig));
3783 else if (TARGET_THUMB2)
3784 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
3785 else /* TARGET_THUMB1 */
3786 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
3788 /* VxWorks does not impose a fixed gap between segments; the run-time
3789 gap can be different from the object-file gap. We therefore can't
3790 use GOTOFF unless we are absolutely sure that the symbol is in the
3791 same segment as the GOT. Unfortunately, the flexibility of linker
3792 scripts means that we can't be sure of that in general, so assume
3793 that GOTOFF is never valid on VxWorks. */
3794 if ((GET_CODE (orig) == LABEL_REF
3795 || (GET_CODE (orig) == SYMBOL_REF &&
3796 SYMBOL_REF_LOCAL_P (orig)))
3798 && !TARGET_VXWORKS_RTP)
3799 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3802 pic_ref = gen_const_mem (Pmode,
3803 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3807 insn = emit_move_insn (reg, pic_ref);
3809 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3811 set_unique_reg_note (insn, REG_EQUAL, orig);
3815 else if (GET_CODE (orig) == CONST)
3819 if (GET_CODE (XEXP (orig, 0)) == PLUS
3820 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3823 /* Handle the case where we have: const (UNSPEC_TLS). */
3824 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3825 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3828 /* Handle the case where we have:
3829 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
3831 if (GET_CODE (XEXP (orig, 0)) == PLUS
3832 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
3833 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
3835 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
3841 gcc_assert (can_create_pseudo_p ());
3842 reg = gen_reg_rtx (Pmode);
3845 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3847 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3848 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3849 base == reg ? 0 : reg);
3851 if (GET_CODE (offset) == CONST_INT)
3853 /* The base register doesn't really matter, we only want to
3854 test the index for the appropriate mode. */
3855 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3857 gcc_assert (can_create_pseudo_p ());
3858 offset = force_reg (Pmode, offset);
3861 if (GET_CODE (offset) == CONST_INT)
3862 return plus_constant (base, INTVAL (offset));
3865 if (GET_MODE_SIZE (mode) > 4
3866 && (GET_MODE_CLASS (mode) == MODE_INT
3867 || TARGET_SOFT_FLOAT))
3869 emit_insn (gen_addsi3 (reg, base, offset));
3873 return gen_rtx_PLUS (Pmode, base, offset);
3880 /* Find a spare register to use during the prolog of a function. */
3883 thumb_find_work_register (unsigned long pushed_regs_mask)
3887 /* Check the argument registers first as these are call-used. The
3888 register allocation order means that sometimes r3 might be used
3889 but earlier argument registers might not, so check them all. */
3890 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3891 if (!df_regs_ever_live_p (reg))
3894 /* Before going on to check the call-saved registers we can try a couple
3895 more ways of deducing that r3 is available. The first is when we are
3896 pushing anonymous arguments onto the stack and we have less than 4
3897 registers worth of fixed arguments(*). In this case r3 will be part of
3898 the variable argument list and so we can be sure that it will be
3899 pushed right at the start of the function. Hence it will be available
3900 for the rest of the prologue.
3901 (*): ie crtl->args.pretend_args_size is greater than 0. */
3902 if (cfun->machine->uses_anonymous_args
3903 && crtl->args.pretend_args_size > 0)
3904 return LAST_ARG_REGNUM;
3906 /* The other case is when we have fixed arguments but less than 4 registers
3907 worth. In this case r3 might be used in the body of the function, but
3908 it is not being used to convey an argument into the function. In theory
3909 we could just check crtl->args.size to see how many bytes are
3910 being passed in argument registers, but it seems that it is unreliable.
3911 Sometimes it will have the value 0 when in fact arguments are being
3912 passed. (See testcase execute/20021111-1.c for an example). So we also
3913 check the args_info.nregs field as well. The problem with this field is
3914 that it makes no allowances for arguments that are passed to the
3915 function but which are not used. Hence we could miss an opportunity
3916 when a function has an unused argument in r3. But it is better to be
3917 safe than to be sorry. */
3918 if (! cfun->machine->uses_anonymous_args
3919 && crtl->args.size >= 0
3920 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3921 && crtl->args.info.nregs < 4)
3922 return LAST_ARG_REGNUM;
3924 /* Otherwise look for a call-saved register that is going to be pushed. */
3925 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3926 if (pushed_regs_mask & (1 << reg))
3931 /* Thumb-2 can use high regs. */
3932 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
3933 if (pushed_regs_mask & (1 << reg))
3936 /* Something went wrong - thumb_compute_save_reg_mask()
3937 should have arranged for a suitable register to be pushed. */
3941 static GTY(()) int pic_labelno;
3943 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3947 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3949 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
3951 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3954 gcc_assert (flag_pic);
3956 pic_reg = cfun->machine->pic_reg;
3957 if (TARGET_VXWORKS_RTP)
3959 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
3960 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3961 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3963 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
3965 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
3966 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
3970 /* We use an UNSPEC rather than a LABEL_REF because this label
3971 never appears in the code stream. */
3973 labelno = GEN_INT (pic_labelno++);
3974 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3975 l1 = gen_rtx_CONST (VOIDmode, l1);
3977 /* On the ARM the PC register contains 'dot + 8' at the time of the
3978 addition, on the Thumb it is 'dot + 4'. */
3979 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
3980 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
3982 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3986 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3987 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
3989 else if (TARGET_THUMB2)
3991 /* Thumb-2 only allows very limited access to the PC. Calculate the
3992 address in a temporary register. */
3993 if (arm_pic_register != INVALID_REGNUM)
3995 pic_tmp = gen_rtx_REG (SImode,
3996 thumb_find_work_register (saved_regs));
4000 gcc_assert (can_create_pseudo_p ());
4001 pic_tmp = gen_reg_rtx (Pmode);
4004 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
4005 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
4006 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
4008 else /* TARGET_THUMB1 */
4010 if (arm_pic_register != INVALID_REGNUM
4011 && REGNO (pic_reg) > LAST_LO_REGNUM)
4013 /* We will have pushed the pic register, so we should always be
4014 able to find a work register. */
4015 pic_tmp = gen_rtx_REG (SImode,
4016 thumb_find_work_register (saved_regs));
4017 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
4018 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
4021 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
4022 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
4026 /* Need to emit this whether or not we obey regdecls,
4027 since setjmp/longjmp can cause life info to screw up. */
4032 /* Return nonzero if X is valid as an ARM state addressing register. */
4034 arm_address_register_rtx_p (rtx x, int strict_p)
4038 if (GET_CODE (x) != REG)
4044 return ARM_REGNO_OK_FOR_BASE_P (regno);
4046 return (regno <= LAST_ARM_REGNUM
4047 || regno >= FIRST_PSEUDO_REGISTER
4048 || regno == FRAME_POINTER_REGNUM
4049 || regno == ARG_POINTER_REGNUM);
4052 /* Return TRUE if this rtx is the difference of a symbol and a label,
4053 and will reduce to a PC-relative relocation in the object file.
4054 Expressions like this can be left alone when generating PIC, rather
4055 than forced through the GOT. */
4057 pcrel_constant_p (rtx x)
4059 if (GET_CODE (x) == MINUS)
4060 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
4065 /* Return nonzero if X is a valid ARM state address operand. */
4067 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
4071 enum rtx_code code = GET_CODE (x);
4073 if (arm_address_register_rtx_p (x, strict_p))
4076 use_ldrd = (TARGET_LDRD
4078 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
4080 if (code == POST_INC || code == PRE_DEC
4081 || ((code == PRE_INC || code == POST_DEC)
4082 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
4083 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
4085 else if ((code == POST_MODIFY || code == PRE_MODIFY)
4086 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
4087 && GET_CODE (XEXP (x, 1)) == PLUS
4088 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
4090 rtx addend = XEXP (XEXP (x, 1), 1);
4092 /* Don't allow ldrd post increment by register because it's hard
4093 to fixup invalid register choices. */
4095 && GET_CODE (x) == POST_MODIFY
4096 && GET_CODE (addend) == REG)
4099 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
4100 && arm_legitimate_index_p (mode, addend, outer, strict_p));
4103 /* After reload constants split into minipools will have addresses
4104 from a LABEL_REF. */
4105 else if (reload_completed
4106 && (code == LABEL_REF
4108 && GET_CODE (XEXP (x, 0)) == PLUS
4109 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4110 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4113 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
4116 else if (code == PLUS)
4118 rtx xop0 = XEXP (x, 0);
4119 rtx xop1 = XEXP (x, 1);
4121 return ((arm_address_register_rtx_p (xop0, strict_p)
4122 && GET_CODE(xop1) == CONST_INT
4123 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
4124 || (arm_address_register_rtx_p (xop1, strict_p)
4125 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
4129 /* Reload currently can't handle MINUS, so disable this for now */
4130 else if (GET_CODE (x) == MINUS)
4132 rtx xop0 = XEXP (x, 0);
4133 rtx xop1 = XEXP (x, 1);
4135 return (arm_address_register_rtx_p (xop0, strict_p)
4136 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
4140 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4141 && code == SYMBOL_REF
4142 && CONSTANT_POOL_ADDRESS_P (x)
4144 && symbol_mentioned_p (get_pool_constant (x))
4145 && ! pcrel_constant_p (get_pool_constant (x))))
4151 /* Return nonzero if X is a valid Thumb-2 address operand. */
4153 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4156 enum rtx_code code = GET_CODE (x);
4158 if (arm_address_register_rtx_p (x, strict_p))
4161 use_ldrd = (TARGET_LDRD
4163 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
4165 if (code == POST_INC || code == PRE_DEC
4166 || ((code == PRE_INC || code == POST_DEC)
4167 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
4168 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
4170 else if ((code == POST_MODIFY || code == PRE_MODIFY)
4171 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
4172 && GET_CODE (XEXP (x, 1)) == PLUS
4173 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
4175 /* Thumb-2 only has autoincrement by constant. */
4176 rtx addend = XEXP (XEXP (x, 1), 1);
4177 HOST_WIDE_INT offset;
4179 if (GET_CODE (addend) != CONST_INT)
4182 offset = INTVAL(addend);
4183 if (GET_MODE_SIZE (mode) <= 4)
4184 return (offset > -256 && offset < 256);
4186 return (use_ldrd && offset > -1024 && offset < 1024
4187 && (offset & 3) == 0);
4190 /* After reload constants split into minipools will have addresses
4191 from a LABEL_REF. */
4192 else if (reload_completed
4193 && (code == LABEL_REF
4195 && GET_CODE (XEXP (x, 0)) == PLUS
4196 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4197 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4200 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
4203 else if (code == PLUS)
4205 rtx xop0 = XEXP (x, 0);
4206 rtx xop1 = XEXP (x, 1);
4208 return ((arm_address_register_rtx_p (xop0, strict_p)
4209 && thumb2_legitimate_index_p (mode, xop1, strict_p))
4210 || (arm_address_register_rtx_p (xop1, strict_p)
4211 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
4214 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4215 && code == SYMBOL_REF
4216 && CONSTANT_POOL_ADDRESS_P (x)
4218 && symbol_mentioned_p (get_pool_constant (x))
4219 && ! pcrel_constant_p (get_pool_constant (x))))
4225 /* Return nonzero if INDEX is valid for an address index operand in
4228 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
4231 HOST_WIDE_INT range;
4232 enum rtx_code code = GET_CODE (index);
4234 /* Standard coprocessor addressing modes. */
4235 if (TARGET_HARD_FLOAT
4236 && (TARGET_FPA || TARGET_MAVERICK)
4237 && (GET_MODE_CLASS (mode) == MODE_FLOAT
4238 || (TARGET_MAVERICK && mode == DImode)))
4239 return (code == CONST_INT && INTVAL (index) < 1024
4240 && INTVAL (index) > -1024
4241 && (INTVAL (index) & 3) == 0);
4244 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4245 return (code == CONST_INT
4246 && INTVAL (index) < 1016
4247 && INTVAL (index) > -1024
4248 && (INTVAL (index) & 3) == 0);
4250 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4251 return (code == CONST_INT
4252 && INTVAL (index) < 1024
4253 && INTVAL (index) > -1024
4254 && (INTVAL (index) & 3) == 0);
4256 if (arm_address_register_rtx_p (index, strict_p)
4257 && (GET_MODE_SIZE (mode) <= 4))
4260 if (mode == DImode || mode == DFmode)
4262 if (code == CONST_INT)
4264 HOST_WIDE_INT val = INTVAL (index);
4267 return val > -256 && val < 256;
4269 return val > -4096 && val < 4092;
4272 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
4275 if (GET_MODE_SIZE (mode) <= 4
4279 || (mode == QImode && outer == SIGN_EXTEND))))
4283 rtx xiop0 = XEXP (index, 0);
4284 rtx xiop1 = XEXP (index, 1);
4286 return ((arm_address_register_rtx_p (xiop0, strict_p)
4287 && power_of_two_operand (xiop1, SImode))
4288 || (arm_address_register_rtx_p (xiop1, strict_p)
4289 && power_of_two_operand (xiop0, SImode)));
4291 else if (code == LSHIFTRT || code == ASHIFTRT
4292 || code == ASHIFT || code == ROTATERT)
4294 rtx op = XEXP (index, 1);
4296 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4297 && GET_CODE (op) == CONST_INT
4299 && INTVAL (op) <= 31);
4303 /* For ARM v4 we may be doing a sign-extend operation during the
4309 || (outer == SIGN_EXTEND && mode == QImode))
4315 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
4317 return (code == CONST_INT
4318 && INTVAL (index) < range
4319 && INTVAL (index) > -range);
4322 /* Return true if OP is a valid index scaling factor for Thumb-2 address
4323 index operand. i.e. 1, 2, 4 or 8. */
4325 thumb2_index_mul_operand (rtx op)
4329 if (GET_CODE(op) != CONST_INT)
4333 return (val == 1 || val == 2 || val == 4 || val == 8);
4336 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
4338 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
4340 enum rtx_code code = GET_CODE (index);
4342 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
4343 /* Standard coprocessor addressing modes. */
4344 if (TARGET_HARD_FLOAT
4345 && (TARGET_FPA || TARGET_MAVERICK)
4346 && (GET_MODE_CLASS (mode) == MODE_FLOAT
4347 || (TARGET_MAVERICK && mode == DImode)))
4348 return (code == CONST_INT && INTVAL (index) < 1024
4349 && INTVAL (index) > -1024
4350 && (INTVAL (index) & 3) == 0);
4352 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4354 /* For DImode assume values will usually live in core regs
4355 and only allow LDRD addressing modes. */
4356 if (!TARGET_LDRD || mode != DImode)
4357 return (code == CONST_INT
4358 && INTVAL (index) < 1024
4359 && INTVAL (index) > -1024
4360 && (INTVAL (index) & 3) == 0);
4364 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4365 return (code == CONST_INT
4366 && INTVAL (index) < 1016
4367 && INTVAL (index) > -1024
4368 && (INTVAL (index) & 3) == 0);
4370 if (arm_address_register_rtx_p (index, strict_p)
4371 && (GET_MODE_SIZE (mode) <= 4))
4374 if (mode == DImode || mode == DFmode)
4376 if (code == CONST_INT)
4378 HOST_WIDE_INT val = INTVAL (index);
4379 /* ??? Can we assume ldrd for thumb2? */
4380 /* Thumb-2 ldrd only has reg+const addressing modes. */
4381 /* ldrd supports offsets of +-1020.
4382 However the ldr fallback does not. */
4383 return val > -256 && val < 256 && (val & 3) == 0;
4391 rtx xiop0 = XEXP (index, 0);
4392 rtx xiop1 = XEXP (index, 1);
4394 return ((arm_address_register_rtx_p (xiop0, strict_p)
4395 && thumb2_index_mul_operand (xiop1))
4396 || (arm_address_register_rtx_p (xiop1, strict_p)
4397 && thumb2_index_mul_operand (xiop0)));
4399 else if (code == ASHIFT)
4401 rtx op = XEXP (index, 1);
4403 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4404 && GET_CODE (op) == CONST_INT
4406 && INTVAL (op) <= 3);
4409 return (code == CONST_INT
4410 && INTVAL (index) < 4096
4411 && INTVAL (index) > -256);
4414 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
4416 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
4420 if (GET_CODE (x) != REG)
4426 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
4428 return (regno <= LAST_LO_REGNUM
4429 || regno > LAST_VIRTUAL_REGISTER
4430 || regno == FRAME_POINTER_REGNUM
4431 || (GET_MODE_SIZE (mode) >= 4
4432 && (regno == STACK_POINTER_REGNUM
4433 || regno >= FIRST_PSEUDO_REGISTER
4434 || x == hard_frame_pointer_rtx
4435 || x == arg_pointer_rtx)));
4438 /* Return nonzero if x is a legitimate index register. This is the case
4439 for any base register that can access a QImode object. */
4441 thumb1_index_register_rtx_p (rtx x, int strict_p)
4443 return thumb1_base_register_rtx_p (x, QImode, strict_p);
4446 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
4448 The AP may be eliminated to either the SP or the FP, so we use the
4449 least common denominator, e.g. SImode, and offsets from 0 to 64.
4451 ??? Verify whether the above is the right approach.
4453 ??? Also, the FP may be eliminated to the SP, so perhaps that
4454 needs special handling also.
4456 ??? Look at how the mips16 port solves this problem. It probably uses
4457 better ways to solve some of these problems.
4459 Although it is not incorrect, we don't accept QImode and HImode
4460 addresses based on the frame pointer or arg pointer until the
4461 reload pass starts. This is so that eliminating such addresses
4462 into stack based ones won't produce impossible code. */
4464 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4466 /* ??? Not clear if this is right. Experiment. */
4467 if (GET_MODE_SIZE (mode) < 4
4468 && !(reload_in_progress || reload_completed)
4469 && (reg_mentioned_p (frame_pointer_rtx, x)
4470 || reg_mentioned_p (arg_pointer_rtx, x)
4471 || reg_mentioned_p (virtual_incoming_args_rtx, x)
4472 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
4473 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
4474 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
4477 /* Accept any base register. SP only in SImode or larger. */
4478 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
4481 /* This is PC relative data before arm_reorg runs. */
4482 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
4483 && GET_CODE (x) == SYMBOL_REF
4484 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
4487 /* This is PC relative data after arm_reorg runs. */
4488 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
4490 && (GET_CODE (x) == LABEL_REF
4491 || (GET_CODE (x) == CONST
4492 && GET_CODE (XEXP (x, 0)) == PLUS
4493 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4494 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4497 /* Post-inc indexing only supported for SImode and larger. */
4498 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
4499 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
4502 else if (GET_CODE (x) == PLUS)
4504 /* REG+REG address can be any two index registers. */
4505 /* We disallow FRAME+REG addressing since we know that FRAME
4506 will be replaced with STACK, and SP relative addressing only
4507 permits SP+OFFSET. */
4508 if (GET_MODE_SIZE (mode) <= 4
4509 && XEXP (x, 0) != frame_pointer_rtx
4510 && XEXP (x, 1) != frame_pointer_rtx
4511 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4512 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
4515 /* REG+const has 5-7 bit offset for non-SP registers. */
4516 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4517 || XEXP (x, 0) == arg_pointer_rtx)
4518 && GET_CODE (XEXP (x, 1)) == CONST_INT
4519 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4522 /* REG+const has 10-bit offset for SP, but only SImode and
4523 larger is supported. */
4524 /* ??? Should probably check for DI/DFmode overflow here
4525 just like GO_IF_LEGITIMATE_OFFSET does. */
4526 else if (GET_CODE (XEXP (x, 0)) == REG
4527 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
4528 && GET_MODE_SIZE (mode) >= 4
4529 && GET_CODE (XEXP (x, 1)) == CONST_INT
4530 && INTVAL (XEXP (x, 1)) >= 0
4531 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
4532 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4535 else if (GET_CODE (XEXP (x, 0)) == REG
4536 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
4537 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
4538 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
4539 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
4540 && GET_MODE_SIZE (mode) >= 4
4541 && GET_CODE (XEXP (x, 1)) == CONST_INT
4542 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4546 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4547 && GET_MODE_SIZE (mode) == 4
4548 && GET_CODE (x) == SYMBOL_REF
4549 && CONSTANT_POOL_ADDRESS_P (x)
4551 && symbol_mentioned_p (get_pool_constant (x))
4552 && ! pcrel_constant_p (get_pool_constant (x))))
4558 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
4559 instruction of mode MODE. */
4561 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
4563 switch (GET_MODE_SIZE (mode))
4566 return val >= 0 && val < 32;
4569 return val >= 0 && val < 64 && (val & 1) == 0;
4573 && (val + GET_MODE_SIZE (mode)) <= 128
4579 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
4582 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
4583 else if (TARGET_THUMB2)
4584 return thumb2_legitimate_address_p (mode, x, strict_p);
4585 else /* if (TARGET_THUMB1) */
4586 return thumb1_legitimate_address_p (mode, x, strict_p);
4589 /* Build the SYMBOL_REF for __tls_get_addr. */
4591 static GTY(()) rtx tls_get_addr_libfunc;
4594 get_tls_get_addr (void)
4596 if (!tls_get_addr_libfunc)
4597 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
4598 return tls_get_addr_libfunc;
4602 arm_load_tp (rtx target)
4605 target = gen_reg_rtx (SImode);
4609 /* Can return in any reg. */
4610 emit_insn (gen_load_tp_hard (target));
4614 /* Always returned in r0. Immediately copy the result into a pseudo,
4615 otherwise other uses of r0 (e.g. setting up function arguments) may
4616 clobber the value. */
4620 emit_insn (gen_load_tp_soft ());
4622 tmp = gen_rtx_REG (SImode, 0);
4623 emit_move_insn (target, tmp);
4629 load_tls_operand (rtx x, rtx reg)
4633 if (reg == NULL_RTX)
4634 reg = gen_reg_rtx (SImode);
4636 tmp = gen_rtx_CONST (SImode, x);
4638 emit_move_insn (reg, tmp);
4644 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
4646 rtx insns, label, labelno, sum;
4650 labelno = GEN_INT (pic_labelno++);
4651 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4652 label = gen_rtx_CONST (VOIDmode, label);
4654 sum = gen_rtx_UNSPEC (Pmode,
4655 gen_rtvec (4, x, GEN_INT (reloc), label,
4656 GEN_INT (TARGET_ARM ? 8 : 4)),
4658 reg = load_tls_operand (sum, reg);
4661 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
4662 else if (TARGET_THUMB2)
4665 /* Thumb-2 only allows very limited access to the PC. Calculate
4666 the address in a temporary register. */
4667 tmp = gen_reg_rtx (SImode);
4668 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4669 emit_insn (gen_addsi3(reg, reg, tmp));
4671 else /* TARGET_THUMB1 */
4672 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4674 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
4675 Pmode, 1, reg, Pmode);
4677 insns = get_insns ();
4684 legitimize_tls_address (rtx x, rtx reg)
4686 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
4687 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
4691 case TLS_MODEL_GLOBAL_DYNAMIC:
4692 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
4693 dest = gen_reg_rtx (Pmode);
4694 emit_libcall_block (insns, dest, ret, x);
4697 case TLS_MODEL_LOCAL_DYNAMIC:
4698 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
4700 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
4701 share the LDM result with other LD model accesses. */
4702 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
4704 dest = gen_reg_rtx (Pmode);
4705 emit_libcall_block (insns, dest, ret, eqv);
4707 /* Load the addend. */
4708 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
4710 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
4711 return gen_rtx_PLUS (Pmode, dest, addend);
4713 case TLS_MODEL_INITIAL_EXEC:
4714 labelno = GEN_INT (pic_labelno++);
4715 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4716 label = gen_rtx_CONST (VOIDmode, label);
4717 sum = gen_rtx_UNSPEC (Pmode,
4718 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
4719 GEN_INT (TARGET_ARM ? 8 : 4)),
4721 reg = load_tls_operand (sum, reg);
4724 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
4725 else if (TARGET_THUMB2)
4728 /* Thumb-2 only allows very limited access to the PC. Calculate
4729 the address in a temporary register. */
4730 tmp = gen_reg_rtx (SImode);
4731 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4732 emit_insn (gen_addsi3(reg, reg, tmp));
4733 emit_move_insn (reg, gen_const_mem (SImode, reg));
4737 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4738 emit_move_insn (reg, gen_const_mem (SImode, reg));
4741 tp = arm_load_tp (NULL_RTX);
4743 return gen_rtx_PLUS (Pmode, tp, reg);
4745 case TLS_MODEL_LOCAL_EXEC:
4746 tp = arm_load_tp (NULL_RTX);
4748 reg = gen_rtx_UNSPEC (Pmode,
4749 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
4751 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
4753 return gen_rtx_PLUS (Pmode, tp, reg);
4760 /* Try machine-dependent ways of modifying an illegitimate address
4761 to be legitimate. If we find one, return the new, valid address. */
4763 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4767 /* TODO: legitimize_address for Thumb2. */
4770 return thumb_legitimize_address (x, orig_x, mode);
4773 if (arm_tls_symbol_p (x))
4774 return legitimize_tls_address (x, NULL_RTX);
4776 if (GET_CODE (x) == PLUS)
4778 rtx xop0 = XEXP (x, 0);
4779 rtx xop1 = XEXP (x, 1);
4781 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4782 xop0 = force_reg (SImode, xop0);
4784 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4785 xop1 = force_reg (SImode, xop1);
4787 if (ARM_BASE_REGISTER_RTX_P (xop0)
4788 && GET_CODE (xop1) == CONST_INT)
4790 HOST_WIDE_INT n, low_n;
4794 /* VFP addressing modes actually allow greater offsets, but for
4795 now we just stick with the lowest common denominator. */
4797 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4809 low_n = ((mode) == TImode ? 0
4810 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4814 base_reg = gen_reg_rtx (SImode);
4815 val = force_operand (plus_constant (xop0, n), NULL_RTX);
4816 emit_move_insn (base_reg, val);
4817 x = plus_constant (base_reg, low_n);
4819 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4820 x = gen_rtx_PLUS (SImode, xop0, xop1);
4823 /* XXX We don't allow MINUS any more -- see comment in
4824 arm_legitimate_address_outer_p (). */
4825 else if (GET_CODE (x) == MINUS)
4827 rtx xop0 = XEXP (x, 0);
4828 rtx xop1 = XEXP (x, 1);
4830 if (CONSTANT_P (xop0))
4831 xop0 = force_reg (SImode, xop0);
4833 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4834 xop1 = force_reg (SImode, xop1);
4836 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4837 x = gen_rtx_MINUS (SImode, xop0, xop1);
4840 /* Make sure to take full advantage of the pre-indexed addressing mode
4841 with absolute addresses which often allows for the base register to
4842 be factorized for multiple adjacent memory references, and it might
4843 even allows for the mini pool to be avoided entirely. */
4844 else if (GET_CODE (x) == CONST_INT && optimize > 0)
4847 HOST_WIDE_INT mask, base, index;
4850 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
4851 use a 8-bit index. So let's use a 12-bit index for SImode only and
4852 hope that arm_gen_constant will enable ldrb to use more bits. */
4853 bits = (mode == SImode) ? 12 : 8;
4854 mask = (1 << bits) - 1;
4855 base = INTVAL (x) & ~mask;
4856 index = INTVAL (x) & mask;
4857 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4859 /* It'll most probably be more efficient to generate the base
4860 with more bits set and use a negative index instead. */
4864 base_reg = force_reg (SImode, GEN_INT (base));
4865 x = plus_constant (base_reg, index);
4870 /* We need to find and carefully transform any SYMBOL and LABEL
4871 references; so go back to the original address expression. */
4872 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4874 if (new_x != orig_x)
4882 /* Try machine-dependent ways of modifying an illegitimate Thumb address
4883 to be legitimate. If we find one, return the new, valid address. */
4885 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4887 if (arm_tls_symbol_p (x))
4888 return legitimize_tls_address (x, NULL_RTX);
4890 if (GET_CODE (x) == PLUS
4891 && GET_CODE (XEXP (x, 1)) == CONST_INT
4892 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4893 || INTVAL (XEXP (x, 1)) < 0))
4895 rtx xop0 = XEXP (x, 0);
4896 rtx xop1 = XEXP (x, 1);
4897 HOST_WIDE_INT offset = INTVAL (xop1);
4899 /* Try and fold the offset into a biasing of the base register and
4900 then offsetting that. Don't do this when optimizing for space
4901 since it can cause too many CSEs. */
4902 if (optimize_size && offset >= 0
4903 && offset < 256 + 31 * GET_MODE_SIZE (mode))
4905 HOST_WIDE_INT delta;
4908 delta = offset - (256 - GET_MODE_SIZE (mode));
4909 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4910 delta = 31 * GET_MODE_SIZE (mode);
4912 delta = offset & (~31 * GET_MODE_SIZE (mode));
4914 xop0 = force_operand (plus_constant (xop0, offset - delta),
4916 x = plus_constant (xop0, delta);
4918 else if (offset < 0 && offset > -256)
4919 /* Small negative offsets are best done with a subtract before the
4920 dereference, forcing these into a register normally takes two
4922 x = force_operand (x, NULL_RTX);
4925 /* For the remaining cases, force the constant into a register. */
4926 xop1 = force_reg (SImode, xop1);
4927 x = gen_rtx_PLUS (SImode, xop0, xop1);
4930 else if (GET_CODE (x) == PLUS
4931 && s_register_operand (XEXP (x, 1), SImode)
4932 && !s_register_operand (XEXP (x, 0), SImode))
4934 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4936 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4941 /* We need to find and carefully transform any SYMBOL and LABEL
4942 references; so go back to the original address expression. */
4943 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4945 if (new_x != orig_x)
4953 thumb_legitimize_reload_address (rtx *x_p,
4954 enum machine_mode mode,
4955 int opnum, int type,
4956 int ind_levels ATTRIBUTE_UNUSED)
4960 if (GET_CODE (x) == PLUS
4961 && GET_MODE_SIZE (mode) < 4
4962 && REG_P (XEXP (x, 0))
4963 && XEXP (x, 0) == stack_pointer_rtx
4964 && GET_CODE (XEXP (x, 1)) == CONST_INT
4965 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4970 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4971 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
4975 /* If both registers are hi-regs, then it's better to reload the
4976 entire expression rather than each register individually. That
4977 only requires one reload register rather than two. */
4978 if (GET_CODE (x) == PLUS
4979 && REG_P (XEXP (x, 0))
4980 && REG_P (XEXP (x, 1))
4981 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4982 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4987 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4988 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
4995 /* Test for various thread-local symbols. */
4997 /* Return TRUE if X is a thread-local symbol. */
5000 arm_tls_symbol_p (rtx x)
5002 if (! TARGET_HAVE_TLS)
5005 if (GET_CODE (x) != SYMBOL_REF)
5008 return SYMBOL_REF_TLS_MODEL (x) != 0;
5011 /* Helper for arm_tls_referenced_p. */
5014 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
5016 if (GET_CODE (*x) == SYMBOL_REF)
5017 return SYMBOL_REF_TLS_MODEL (*x) != 0;
5019 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
5020 TLS offsets, not real symbol references. */
5021 if (GET_CODE (*x) == UNSPEC
5022 && XINT (*x, 1) == UNSPEC_TLS)
5028 /* Return TRUE if X contains any TLS symbol references. */
5031 arm_tls_referenced_p (rtx x)
5033 if (! TARGET_HAVE_TLS)
5036 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
5039 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
5042 arm_cannot_force_const_mem (rtx x)
5046 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
5048 split_const (x, &base, &offset);
5049 if (GET_CODE (base) == SYMBOL_REF
5050 && !offset_within_block_p (base, INTVAL (offset)))
5053 return arm_tls_referenced_p (x);
5056 #define REG_OR_SUBREG_REG(X) \
5057 (GET_CODE (X) == REG \
5058 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
5060 #define REG_OR_SUBREG_RTX(X) \
5061 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
5063 #ifndef COSTS_N_INSNS
5064 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
5067 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
5069 enum machine_mode mode = GET_MODE (x);
5082 return COSTS_N_INSNS (1);
5085 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5088 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
5095 return COSTS_N_INSNS (2) + cycles;
5097 return COSTS_N_INSNS (1) + 16;
5100 return (COSTS_N_INSNS (1)
5101 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
5102 + GET_CODE (SET_DEST (x)) == MEM));
5107 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
5109 if (thumb_shiftable_const (INTVAL (x)))
5110 return COSTS_N_INSNS (2);
5111 return COSTS_N_INSNS (3);
5113 else if ((outer == PLUS || outer == COMPARE)
5114 && INTVAL (x) < 256 && INTVAL (x) > -256)
5116 else if (outer == AND
5117 && INTVAL (x) < 256 && INTVAL (x) >= -256)
5118 return COSTS_N_INSNS (1);
5119 else if (outer == ASHIFT || outer == ASHIFTRT
5120 || outer == LSHIFTRT)
5122 return COSTS_N_INSNS (2);
5128 return COSTS_N_INSNS (3);
5146 /* XXX another guess. */
5147 /* Memory costs quite a lot for the first word, but subsequent words
5148 load at the equivalent of a single insn each. */
5149 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
5150 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
5155 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5160 /* XXX still guessing. */
5161 switch (GET_MODE (XEXP (x, 0)))
5164 return (1 + (mode == DImode ? 4 : 0)
5165 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5168 return (4 + (mode == DImode ? 4 : 0)
5169 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5172 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5184 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
5186 enum machine_mode mode = GET_MODE (x);
5187 enum rtx_code subcode;
5189 enum rtx_code code = GET_CODE (x);
5196 /* Memory costs quite a lot for the first word, but subsequent words
5197 load at the equivalent of a single insn each. */
5198 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
5205 if (TARGET_HARD_FLOAT && mode == SFmode)
5206 *total = COSTS_N_INSNS (2);
5207 else if (TARGET_HARD_FLOAT && mode == DFmode)
5208 *total = COSTS_N_INSNS (4);
5210 *total = COSTS_N_INSNS (20);
5214 if (GET_CODE (XEXP (x, 1)) == REG)
5215 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
5216 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5217 *total = rtx_cost (XEXP (x, 1), code, speed);
5223 *total += COSTS_N_INSNS (4);
5228 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
5229 *total += rtx_cost (XEXP (x, 0), code, speed);
5232 *total += COSTS_N_INSNS (3);
5236 *total += COSTS_N_INSNS (1);
5237 /* Increase the cost of complex shifts because they aren't any faster,
5238 and reduce dual issue opportunities. */
5239 if (arm_tune_cortex_a9
5240 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
5248 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5250 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5251 *total = COSTS_N_INSNS (1);
5253 *total = COSTS_N_INSNS (20);
5256 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5257 /* Thumb2 does not have RSB, so all arguments must be
5258 registers (subtracting a constant is canonicalized as
5259 addition of the negated constant). */
5265 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5266 if (GET_CODE (XEXP (x, 0)) == CONST_INT
5267 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
5269 *total += rtx_cost (XEXP (x, 1), code, speed);
5273 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5274 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
5276 *total += rtx_cost (XEXP (x, 0), code, speed);
5283 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5285 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5287 *total = COSTS_N_INSNS (1);
5288 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
5289 && arm_const_double_rtx (XEXP (x, 0)))
5291 *total += rtx_cost (XEXP (x, 1), code, speed);
5295 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
5296 && arm_const_double_rtx (XEXP (x, 1)))
5298 *total += rtx_cost (XEXP (x, 0), code, speed);
5304 *total = COSTS_N_INSNS (20);
5308 *total = COSTS_N_INSNS (1);
5309 if (GET_CODE (XEXP (x, 0)) == CONST_INT
5310 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
5312 *total += rtx_cost (XEXP (x, 1), code, speed);
5316 subcode = GET_CODE (XEXP (x, 1));
5317 if (subcode == ASHIFT || subcode == ASHIFTRT
5318 || subcode == LSHIFTRT
5319 || subcode == ROTATE || subcode == ROTATERT)
5321 *total += rtx_cost (XEXP (x, 0), code, speed);
5322 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
5326 /* A shift as a part of RSB costs no more than RSB itself. */
5327 if (GET_CODE (XEXP (x, 0)) == MULT
5328 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5330 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
5331 *total += rtx_cost (XEXP (x, 1), code, speed);
5336 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
5338 *total += rtx_cost (XEXP (x, 0), code, speed);
5339 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
5343 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
5344 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
5346 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
5347 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
5348 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
5349 *total += COSTS_N_INSNS (1);
5357 if (code == PLUS && arm_arch6 && mode == SImode
5358 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5359 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5361 *total = COSTS_N_INSNS (1);
5362 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
5364 *total += rtx_cost (XEXP (x, 1), code, speed);
5368 /* MLA: All arguments must be registers. We filter out
5369 multiplication by a power of two, so that we fall down into
5371 if (GET_CODE (XEXP (x, 0)) == MULT
5372 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5374 /* The cost comes from the cost of the multiply. */
5378 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5380 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5382 *total = COSTS_N_INSNS (1);
5383 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
5384 && arm_const_double_rtx (XEXP (x, 1)))
5386 *total += rtx_cost (XEXP (x, 0), code, speed);
5393 *total = COSTS_N_INSNS (20);
5397 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
5398 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
5400 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
5401 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5402 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
5403 *total += COSTS_N_INSNS (1);
5409 case AND: case XOR: case IOR:
5412 /* Normally the frame registers will be spilt into reg+const during
5413 reload, so it is a bad idea to combine them with other instructions,
5414 since then they might not be moved outside of loops. As a compromise
5415 we allow integration with ops that have a constant as their second
5417 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
5418 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
5419 && GET_CODE (XEXP (x, 1)) != CONST_INT)
5420 || (REG_OR_SUBREG_REG (XEXP (x, 0))
5421 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
5426 *total += COSTS_N_INSNS (2);
5427 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5428 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
5430 *total += rtx_cost (XEXP (x, 0), code, speed);
5437 *total += COSTS_N_INSNS (1);
5438 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5439 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
5441 *total += rtx_cost (XEXP (x, 0), code, speed);
5444 subcode = GET_CODE (XEXP (x, 0));
5445 if (subcode == ASHIFT || subcode == ASHIFTRT
5446 || subcode == LSHIFTRT
5447 || subcode == ROTATE || subcode == ROTATERT)
5449 *total += rtx_cost (XEXP (x, 1), code, speed);
5450 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5455 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5457 *total += rtx_cost (XEXP (x, 1), code, speed);
5458 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5462 if (subcode == UMIN || subcode == UMAX
5463 || subcode == SMIN || subcode == SMAX)
5465 *total = COSTS_N_INSNS (3);
5472 /* This should have been handled by the CPU specific routines. */
5476 if (arm_arch3m && mode == SImode
5477 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5478 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5479 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
5480 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
5481 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5482 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
5484 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
5487 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
5491 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5493 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5495 *total = COSTS_N_INSNS (1);
5498 *total = COSTS_N_INSNS (2);
5504 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
5505 if (mode == SImode && code == NOT)
5507 subcode = GET_CODE (XEXP (x, 0));
5508 if (subcode == ASHIFT || subcode == ASHIFTRT
5509 || subcode == LSHIFTRT
5510 || subcode == ROTATE || subcode == ROTATERT
5512 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
5514 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5515 /* Register shifts cost an extra cycle. */
5516 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
5517 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
5526 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5528 *total = COSTS_N_INSNS (4);
5532 operand = XEXP (x, 0);
5534 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
5535 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
5536 && GET_CODE (XEXP (operand, 0)) == REG
5537 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
5538 *total += COSTS_N_INSNS (1);
5539 *total += (rtx_cost (XEXP (x, 1), code, speed)
5540 + rtx_cost (XEXP (x, 2), code, speed));
5544 if (mode == SImode && XEXP (x, 1) == const0_rtx)
5546 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
5552 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
5553 && mode == SImode && XEXP (x, 1) == const0_rtx)
5555 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
5561 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
5562 && mode == SImode && XEXP (x, 1) == const0_rtx)
5564 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
5584 /* SCC insns. In the case where the comparison has already been
5585 performed, then they cost 2 instructions. Otherwise they need
5586 an additional comparison before them. */
5587 *total = COSTS_N_INSNS (2);
5588 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
5595 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
5601 *total += COSTS_N_INSNS (1);
5602 if (GET_CODE (XEXP (x, 1)) == CONST_INT
5603 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
5605 *total += rtx_cost (XEXP (x, 0), code, speed);
5609 subcode = GET_CODE (XEXP (x, 0));
5610 if (subcode == ASHIFT || subcode == ASHIFTRT
5611 || subcode == LSHIFTRT
5612 || subcode == ROTATE || subcode == ROTATERT)
5614 *total += rtx_cost (XEXP (x, 1), code, speed);
5615 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5620 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5622 *total += rtx_cost (XEXP (x, 1), code, speed);
5623 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
5633 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
5634 if (GET_CODE (XEXP (x, 1)) != CONST_INT
5635 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
5636 *total += rtx_cost (XEXP (x, 1), code, speed);
5640 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5642 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
5644 *total = COSTS_N_INSNS (1);
5647 *total = COSTS_N_INSNS (20);
5650 *total = COSTS_N_INSNS (1);
5652 *total += COSTS_N_INSNS (3);
5656 if (GET_MODE_CLASS (mode) == MODE_INT)
5660 *total += COSTS_N_INSNS (1);
5662 if (GET_MODE (XEXP (x, 0)) != SImode)
5666 if (GET_CODE (XEXP (x, 0)) != MEM)
5667 *total += COSTS_N_INSNS (1);
5669 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
5670 *total += COSTS_N_INSNS (2);
5679 if (GET_MODE_CLASS (mode) == MODE_INT)
5682 *total += COSTS_N_INSNS (1);
5684 if (GET_MODE (XEXP (x, 0)) != SImode)
5688 if (GET_CODE (XEXP (x, 0)) != MEM)
5689 *total += COSTS_N_INSNS (1);
5691 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
5692 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
5699 switch (GET_MODE (XEXP (x, 0)))
5706 *total = COSTS_N_INSNS (1);
5716 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
5720 if (const_ok_for_arm (INTVAL (x))
5721 || const_ok_for_arm (~INTVAL (x)))
5722 *total = COSTS_N_INSNS (1);
5724 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
5725 INTVAL (x), NULL_RTX,
5732 *total = COSTS_N_INSNS (3);
5736 *total = COSTS_N_INSNS (1);
5740 *total = COSTS_N_INSNS (1);
5741 *total += rtx_cost (XEXP (x, 0), code, speed);
5745 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x))
5746 *total = COSTS_N_INSNS (1);
5748 *total = COSTS_N_INSNS (4);
5752 *total = COSTS_N_INSNS (4);
5757 /* RTX costs when optimizing for size. */
5759 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
5762 enum machine_mode mode = GET_MODE (x);
5765 /* XXX TBD. For now, use the standard costs. */
5766 *total = thumb1_rtx_costs (x, code, outer_code);
5770 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
5774 /* A memory access costs 1 insn if the mode is small, or the address is
5775 a single register, otherwise it costs one insn per word. */
5776 if (REG_P (XEXP (x, 0)))
5777 *total = COSTS_N_INSNS (1);
5779 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5786 /* Needs a libcall, so it costs about this. */
5787 *total = COSTS_N_INSNS (2);
5791 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
5793 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
5801 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
5803 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
5806 else if (mode == SImode)
5808 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
5809 /* Slightly disparage register shifts, but not by much. */
5810 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5811 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
5815 /* Needs a libcall. */
5816 *total = COSTS_N_INSNS (2);
5820 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5822 *total = COSTS_N_INSNS (1);
5828 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
5829 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
5831 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
5832 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
5833 || subcode1 == ROTATE || subcode1 == ROTATERT
5834 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
5835 || subcode1 == ASHIFTRT)
5837 /* It's just the cost of the two operands. */
5842 *total = COSTS_N_INSNS (1);
5846 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5850 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5852 *total = COSTS_N_INSNS (1);
5856 /* A shift as a part of ADD costs nothing. */
5857 if (GET_CODE (XEXP (x, 0)) == MULT
5858 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
5860 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
5861 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
5862 *total += rtx_cost (XEXP (x, 1), code, false);
5867 case AND: case XOR: case IOR:
5870 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
5872 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
5873 || subcode == LSHIFTRT || subcode == ASHIFTRT
5874 || (code == AND && subcode == NOT))
5876 /* It's just the cost of the two operands. */
5882 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5886 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5890 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5892 *total = COSTS_N_INSNS (1);
5898 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5907 if (cc_register (XEXP (x, 0), VOIDmode))
5910 *total = COSTS_N_INSNS (1);
5914 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5915 *total = COSTS_N_INSNS (1);
5917 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
5922 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
5924 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5925 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5928 *total += COSTS_N_INSNS (1);
5933 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5935 switch (GET_MODE (XEXP (x, 0)))
5938 *total += COSTS_N_INSNS (1);
5942 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5948 *total += COSTS_N_INSNS (2);
5953 *total += COSTS_N_INSNS (1);
5958 if (const_ok_for_arm (INTVAL (x)))
5959 /* A multiplication by a constant requires another instruction
5960 to load the constant to a register. */
5961 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
5963 else if (const_ok_for_arm (~INTVAL (x)))
5964 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
5965 else if (const_ok_for_arm (-INTVAL (x)))
5967 if (outer_code == COMPARE || outer_code == PLUS
5968 || outer_code == MINUS)
5971 *total = COSTS_N_INSNS (1);
5974 *total = COSTS_N_INSNS (2);
5980 *total = COSTS_N_INSNS (2);
5984 *total = COSTS_N_INSNS (4);
5989 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
5990 cost of these slightly. */
5991 *total = COSTS_N_INSNS (1) + 1;
5995 if (mode != VOIDmode)
5996 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5998 *total = COSTS_N_INSNS (4); /* How knows? */
6003 /* RTX costs when optimizing for size. */
6005 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
6009 return arm_size_rtx_costs (x, (enum rtx_code) code,
6010 (enum rtx_code) outer_code, total);
6012 return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code,
6013 (enum rtx_code) outer_code,
6017 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
6018 supported on any "slowmul" cores, so it can be ignored. */
6021 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6022 int *total, bool speed)
6024 enum machine_mode mode = GET_MODE (x);
6028 *total = thumb1_rtx_costs (x, code, outer_code);
6035 if (GET_MODE_CLASS (mode) == MODE_FLOAT
6038 *total = COSTS_N_INSNS (20);
6042 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6044 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
6045 & (unsigned HOST_WIDE_INT) 0xffffffff);
6046 int cost, const_ok = const_ok_for_arm (i);
6047 int j, booth_unit_size;
6049 /* Tune as appropriate. */
6050 cost = const_ok ? 4 : 8;
6051 booth_unit_size = 2;
6052 for (j = 0; i && j < 32; j += booth_unit_size)
6054 i >>= booth_unit_size;
6058 *total = COSTS_N_INSNS (cost);
6059 *total += rtx_cost (XEXP (x, 0), code, speed);
6063 *total = COSTS_N_INSNS (20);
6067 return arm_rtx_costs_1 (x, outer_code, total, speed);;
6072 /* RTX cost for cores with a fast multiply unit (M variants). */
6075 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6076 int *total, bool speed)
6078 enum machine_mode mode = GET_MODE (x);
6082 *total = thumb1_rtx_costs (x, code, outer_code);
6086 /* ??? should thumb2 use different costs? */
6090 /* There is no point basing this on the tuning, since it is always the
6091 fast variant if it exists at all. */
6093 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
6094 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6095 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6097 *total = COSTS_N_INSNS(2);
6104 *total = COSTS_N_INSNS (5);
6108 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6110 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
6111 & (unsigned HOST_WIDE_INT) 0xffffffff);
6112 int cost, const_ok = const_ok_for_arm (i);
6113 int j, booth_unit_size;
6115 /* Tune as appropriate. */
6116 cost = const_ok ? 4 : 8;
6117 booth_unit_size = 8;
6118 for (j = 0; i && j < 32; j += booth_unit_size)
6120 i >>= booth_unit_size;
6124 *total = COSTS_N_INSNS(cost);
6130 *total = COSTS_N_INSNS (4);
6134 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6136 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6138 *total = COSTS_N_INSNS (1);
6143 /* Requires a lib call */
6144 *total = COSTS_N_INSNS (20);
6148 return arm_rtx_costs_1 (x, outer_code, total, speed);
6153 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
6154 so it can be ignored. */
6157 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
6159 enum machine_mode mode = GET_MODE (x);
6163 *total = thumb1_rtx_costs (x, code, outer_code);
6170 if (GET_CODE (XEXP (x, 0)) != MULT)
6171 return arm_rtx_costs_1 (x, outer_code, total, speed);
6173 /* A COMPARE of a MULT is slow on XScale; the muls instruction
6174 will stall until the multiplication is complete. */
6175 *total = COSTS_N_INSNS (3);
6179 /* There is no point basing this on the tuning, since it is always the
6180 fast variant if it exists at all. */
6182 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
6183 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6184 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6186 *total = COSTS_N_INSNS (2);
6193 *total = COSTS_N_INSNS (5);
6197 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6199 /* If operand 1 is a constant we can more accurately
6200 calculate the cost of the multiply. The multiplier can
6201 retire 15 bits on the first cycle and a further 12 on the
6202 second. We do, of course, have to load the constant into
6203 a register first. */
6204 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6205 /* There's a general overhead of one cycle. */
6207 unsigned HOST_WIDE_INT masked_const;
6212 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
6214 masked_const = i & 0xffff8000;
6215 if (masked_const != 0)
6218 masked_const = i & 0xf8000000;
6219 if (masked_const != 0)
6222 *total = COSTS_N_INSNS (cost);
6228 *total = COSTS_N_INSNS (3);
6232 /* Requires a lib call */
6233 *total = COSTS_N_INSNS (20);
6237 return arm_rtx_costs_1 (x, outer_code, total, speed);
6242 /* RTX costs for 9e (and later) cores. */
6245 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6246 int *total, bool speed)
6248 enum machine_mode mode = GET_MODE (x);
6255 *total = COSTS_N_INSNS (3);
6259 *total = thumb1_rtx_costs (x, code, outer_code);
6267 /* There is no point basing this on the tuning, since it is always the
6268 fast variant if it exists at all. */
6270 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
6271 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6272 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6274 *total = COSTS_N_INSNS (2);
6281 *total = COSTS_N_INSNS (5);
6287 *total = COSTS_N_INSNS (2);
6291 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6293 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6295 *total = COSTS_N_INSNS (1);
6300 *total = COSTS_N_INSNS (20);
6304 return arm_rtx_costs_1 (x, outer_code, total, speed);
6307 /* All address computations that can be done are free, but rtx cost returns
6308 the same for practically all of them. So we weight the different types
6309 of address here in the order (most pref first):
6310 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
6312 arm_arm_address_cost (rtx x)
6314 enum rtx_code c = GET_CODE (x);
6316 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
6318 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
6321 if (c == PLUS || c == MINUS)
6323 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6326 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
6336 arm_thumb_address_cost (rtx x)
6338 enum rtx_code c = GET_CODE (x);
6343 && GET_CODE (XEXP (x, 0)) == REG
6344 && GET_CODE (XEXP (x, 1)) == CONST_INT)
6351 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
6353 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
6357 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
6361 /* Some true dependencies can have a higher cost depending
6362 on precisely how certain input operands are used. */
6364 && REG_NOTE_KIND (link) == 0
6365 && recog_memoized (insn) >= 0
6366 && recog_memoized (dep) >= 0)
6368 int shift_opnum = get_attr_shift (insn);
6369 enum attr_type attr_type = get_attr_type (dep);
6371 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
6372 operand for INSN. If we have a shifted input operand and the
6373 instruction we depend on is another ALU instruction, then we may
6374 have to account for an additional stall. */
6375 if (shift_opnum != 0
6376 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
6378 rtx shifted_operand;
6381 /* Get the shifted operand. */
6382 extract_insn (insn);
6383 shifted_operand = recog_data.operand[shift_opnum];
6385 /* Iterate over all the operands in DEP. If we write an operand
6386 that overlaps with SHIFTED_OPERAND, then we have increase the
6387 cost of this dependency. */
6389 preprocess_constraints ();
6390 for (opno = 0; opno < recog_data.n_operands; opno++)
6392 /* We can ignore strict inputs. */
6393 if (recog_data.operand_type[opno] == OP_IN)
6396 if (reg_overlap_mentioned_p (recog_data.operand[opno],
6403 /* XXX This is not strictly true for the FPA. */
6404 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
6405 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
6408 /* Call insns don't incur a stall, even if they follow a load. */
6409 if (REG_NOTE_KIND (link) == 0
6410 && GET_CODE (insn) == CALL_INSN)
6413 if ((i_pat = single_set (insn)) != NULL
6414 && GET_CODE (SET_SRC (i_pat)) == MEM
6415 && (d_pat = single_set (dep)) != NULL
6416 && GET_CODE (SET_DEST (d_pat)) == MEM)
6418 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
6419 /* This is a load after a store, there is no conflict if the load reads
6420 from a cached area. Assume that loads from the stack, and from the
6421 constant pool are cached, and that others will miss. This is a
6424 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
6425 || reg_mentioned_p (stack_pointer_rtx, src_mem)
6426 || reg_mentioned_p (frame_pointer_rtx, src_mem)
6427 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
6434 static int fp_consts_inited = 0;
6436 /* Only zero is valid for VFP. Other values are also valid for FPA. */
6437 static const char * const strings_fp[8] =
6440 "4", "5", "0.5", "10"
6443 static REAL_VALUE_TYPE values_fp[8];
6446 init_fp_table (void)
6452 fp_consts_inited = 1;
6454 fp_consts_inited = 8;
6456 for (i = 0; i < fp_consts_inited; i++)
6458 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
6463 /* Return TRUE if rtx X is a valid immediate FP constant. */
6465 arm_const_double_rtx (rtx x)
6470 if (!fp_consts_inited)
6473 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6474 if (REAL_VALUE_MINUS_ZERO (r))
6477 for (i = 0; i < fp_consts_inited; i++)
6478 if (REAL_VALUES_EQUAL (r, values_fp[i]))
6484 /* Return TRUE if rtx X is a valid immediate FPA constant. */
6486 neg_const_double_rtx_ok_for_fpa (rtx x)
6491 if (!fp_consts_inited)
6494 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6495 r = REAL_VALUE_NEGATE (r);
6496 if (REAL_VALUE_MINUS_ZERO (r))
6499 for (i = 0; i < 8; i++)
6500 if (REAL_VALUES_EQUAL (r, values_fp[i]))
6507 /* VFPv3 has a fairly wide range of representable immediates, formed from
6508 "quarter-precision" floating-point values. These can be evaluated using this
6509 formula (with ^ for exponentiation):
6513 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
6514 16 <= n <= 31 and 0 <= r <= 7.
6516 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
6518 - A (most-significant) is the sign bit.
6519 - BCD are the exponent (encoded as r XOR 3).
6520 - EFGH are the mantissa (encoded as n - 16).
6523 /* Return an integer index for a VFPv3 immediate operand X suitable for the
6524 fconst[sd] instruction, or -1 if X isn't suitable. */
6526 vfp3_const_double_index (rtx x)
6528 REAL_VALUE_TYPE r, m;
6530 unsigned HOST_WIDE_INT mantissa, mant_hi;
6531 unsigned HOST_WIDE_INT mask;
6532 HOST_WIDE_INT m1, m2;
6533 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
6535 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
6538 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6540 /* We can't represent these things, so detect them first. */
6541 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
6544 /* Extract sign, exponent and mantissa. */
6545 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
6546 r = REAL_VALUE_ABS (r);
6547 exponent = REAL_EXP (&r);
6548 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
6549 highest (sign) bit, with a fixed binary point at bit point_pos.
6550 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
6551 bits for the mantissa, this may fail (low bits would be lost). */
6552 real_ldexp (&m, &r, point_pos - exponent);
6553 REAL_VALUE_TO_INT (&m1, &m2, m);
6557 /* If there are bits set in the low part of the mantissa, we can't
6558 represent this value. */
6562 /* Now make it so that mantissa contains the most-significant bits, and move
6563 the point_pos to indicate that the least-significant bits have been
6565 point_pos -= HOST_BITS_PER_WIDE_INT;
6568 /* We can permit four significant bits of mantissa only, plus a high bit
6569 which is always 1. */
6570 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
6571 if ((mantissa & mask) != 0)
6574 /* Now we know the mantissa is in range, chop off the unneeded bits. */
6575 mantissa >>= point_pos - 5;
6577 /* The mantissa may be zero. Disallow that case. (It's possible to load the
6578 floating-point immediate zero with Neon using an integer-zero load, but
6579 that case is handled elsewhere.) */
6583 gcc_assert (mantissa >= 16 && mantissa <= 31);
6585 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
6586 normalized significands are in the range [1, 2). (Our mantissa is shifted
6587 left 4 places at this point relative to normalized IEEE754 values). GCC
6588 internally uses [0.5, 1) (see real.c), so the exponent returned from
6589 REAL_EXP must be altered. */
6590 exponent = 5 - exponent;
6592 if (exponent < 0 || exponent > 7)
6595 /* Sign, mantissa and exponent are now in the correct form to plug into the
6596 formula described in the comment above. */
6597 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
6600 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
6602 vfp3_const_double_rtx (rtx x)
6607 return vfp3_const_double_index (x) != -1;
6610 /* Recognize immediates which can be used in various Neon instructions. Legal
6611 immediates are described by the following table (for VMVN variants, the
6612 bitwise inverse of the constant shown is recognized. In either case, VMOV
6613 is output and the correct instruction to use for a given constant is chosen
6614 by the assembler). The constant shown is replicated across all elements of
6615 the destination vector.
6617 insn elems variant constant (binary)
6618 ---- ----- ------- -----------------
6619 vmov i32 0 00000000 00000000 00000000 abcdefgh
6620 vmov i32 1 00000000 00000000 abcdefgh 00000000
6621 vmov i32 2 00000000 abcdefgh 00000000 00000000
6622 vmov i32 3 abcdefgh 00000000 00000000 00000000
6623 vmov i16 4 00000000 abcdefgh
6624 vmov i16 5 abcdefgh 00000000
6625 vmvn i32 6 00000000 00000000 00000000 abcdefgh
6626 vmvn i32 7 00000000 00000000 abcdefgh 00000000
6627 vmvn i32 8 00000000 abcdefgh 00000000 00000000
6628 vmvn i32 9 abcdefgh 00000000 00000000 00000000
6629 vmvn i16 10 00000000 abcdefgh
6630 vmvn i16 11 abcdefgh 00000000
6631 vmov i32 12 00000000 00000000 abcdefgh 11111111
6632 vmvn i32 13 00000000 00000000 abcdefgh 11111111
6633 vmov i32 14 00000000 abcdefgh 11111111 11111111
6634 vmvn i32 15 00000000 abcdefgh 11111111 11111111
6636 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
6637 eeeeeeee ffffffff gggggggg hhhhhhhh
6638 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
6640 For case 18, B = !b. Representable values are exactly those accepted by
6641 vfp3_const_double_index, but are output as floating-point numbers rather
6644 Variants 0-5 (inclusive) may also be used as immediates for the second
6645 operand of VORR/VBIC instructions.
6647 The INVERSE argument causes the bitwise inverse of the given operand to be
6648 recognized instead (used for recognizing legal immediates for the VAND/VORN
6649 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
6650 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
6651 output, rather than the real insns vbic/vorr).
6653 INVERSE makes no difference to the recognition of float vectors.
6655 The return value is the variant of immediate as shown in the above table, or
6656 -1 if the given value doesn't match any of the listed patterns.
6659 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
6660 rtx *modconst, int *elementwidth)
6662 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
6664 for (i = 0; i < idx; i += (STRIDE)) \
6669 immtype = (CLASS); \
6670 elsize = (ELSIZE); \
6674 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6675 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6676 unsigned char bytes[16];
6677 int immtype = -1, matches;
6678 unsigned int invmask = inverse ? 0xff : 0;
6680 /* Vectors of float constants. */
6681 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6683 rtx el0 = CONST_VECTOR_ELT (op, 0);
6686 if (!vfp3_const_double_rtx (el0))
6689 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
6691 for (i = 1; i < n_elts; i++)
6693 rtx elt = CONST_VECTOR_ELT (op, i);
6696 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
6698 if (!REAL_VALUES_EQUAL (r0, re))
6703 *modconst = CONST_VECTOR_ELT (op, 0);
6711 /* Splat vector constant out into a byte vector. */
6712 for (i = 0; i < n_elts; i++)
6714 rtx el = CONST_VECTOR_ELT (op, i);
6715 unsigned HOST_WIDE_INT elpart;
6716 unsigned int part, parts;
6718 if (GET_CODE (el) == CONST_INT)
6720 elpart = INTVAL (el);
6723 else if (GET_CODE (el) == CONST_DOUBLE)
6725 elpart = CONST_DOUBLE_LOW (el);
6731 for (part = 0; part < parts; part++)
6734 for (byte = 0; byte < innersize; byte++)
6736 bytes[idx++] = (elpart & 0xff) ^ invmask;
6737 elpart >>= BITS_PER_UNIT;
6739 if (GET_CODE (el) == CONST_DOUBLE)
6740 elpart = CONST_DOUBLE_HIGH (el);
6745 gcc_assert (idx == GET_MODE_SIZE (mode));
6749 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6750 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6752 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6753 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6755 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6756 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6758 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6759 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
6761 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
6763 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
6765 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6766 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6768 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6769 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6771 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6772 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6774 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6775 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
6777 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
6779 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
6781 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6782 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6784 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6785 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6787 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6788 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6790 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6791 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6793 CHECK (1, 8, 16, bytes[i] == bytes[0]);
6795 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6796 && bytes[i] == bytes[(i + 8) % idx]);
6804 *elementwidth = elsize;
6808 unsigned HOST_WIDE_INT imm = 0;
6810 /* Un-invert bytes of recognized vector, if necessary. */
6812 for (i = 0; i < idx; i++)
6813 bytes[i] ^= invmask;
6817 /* FIXME: Broken on 32-bit H_W_I hosts. */
6818 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6820 for (i = 0; i < 8; i++)
6821 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6822 << (i * BITS_PER_UNIT);
6824 *modconst = GEN_INT (imm);
6828 unsigned HOST_WIDE_INT imm = 0;
6830 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6831 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6833 *modconst = GEN_INT (imm);
6841 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
6842 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
6843 float elements), and a modified constant (whatever should be output for a
6844 VMOV) in *MODCONST. */
6847 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
6848 rtx *modconst, int *elementwidth)
6852 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
6858 *modconst = tmpconst;
6861 *elementwidth = tmpwidth;
6866 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
6867 the immediate is valid, write a constant suitable for using as an operand
6868 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
6869 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
6872 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
6873 rtx *modconst, int *elementwidth)
6877 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
6879 if (retval < 0 || retval > 5)
6883 *modconst = tmpconst;
6886 *elementwidth = tmpwidth;
6891 /* Return a string suitable for output of Neon immediate logic operation
6895 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
6896 int inverse, int quad)
6898 int width, is_valid;
6899 static char templ[40];
6901 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
6903 gcc_assert (is_valid != 0);
6906 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
6908 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
6913 /* Output a sequence of pairwise operations to implement a reduction.
6914 NOTE: We do "too much work" here, because pairwise operations work on two
6915 registers-worth of operands in one go. Unfortunately we can't exploit those
6916 extra calculations to do the full operation in fewer steps, I don't think.
6917 Although all vector elements of the result but the first are ignored, we
6918 actually calculate the same result in each of the elements. An alternative
6919 such as initially loading a vector with zero to use as each of the second
6920 operands would use up an additional register and take an extra instruction,
6921 for no particular gain. */
6924 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
6925 rtx (*reduc) (rtx, rtx, rtx))
6927 enum machine_mode inner = GET_MODE_INNER (mode);
6928 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
6931 for (i = parts / 2; i >= 1; i /= 2)
6933 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
6934 emit_insn (reduc (dest, tmpsum, tmpsum));
6939 /* Initialize a vector with non-constant elements. FIXME: We can do better
6940 than the current implementation (building a vector on the stack and then
6941 loading it) in many cases. See rs6000.c. */
6944 neon_expand_vector_init (rtx target, rtx vals)
6946 enum machine_mode mode = GET_MODE (target);
6947 enum machine_mode inner = GET_MODE_INNER (mode);
6948 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
6951 gcc_assert (VECTOR_MODE_P (mode));
6953 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
6954 for (i = 0; i < n_elts; i++)
6955 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
6956 XVECEXP (vals, 0, i));
6958 emit_move_insn (target, mem);
6961 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
6962 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
6963 reported source locations are bogus. */
6966 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
6971 gcc_assert (GET_CODE (operand) == CONST_INT);
6973 lane = INTVAL (operand);
6975 if (lane < low || lane >= high)
6979 /* Bounds-check lanes. */
6982 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6984 bounds_check (operand, low, high, "lane out of range");
6987 /* Bounds-check constants. */
6990 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6992 bounds_check (operand, low, high, "constant out of range");
6996 neon_element_bits (enum machine_mode mode)
6999 return GET_MODE_BITSIZE (mode);
7001 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
7005 /* Predicates for `match_operand' and `match_operator'. */
7007 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
7009 cirrus_memory_offset (rtx op)
7011 /* Reject eliminable registers. */
7012 if (! (reload_in_progress || reload_completed)
7013 && ( reg_mentioned_p (frame_pointer_rtx, op)
7014 || reg_mentioned_p (arg_pointer_rtx, op)
7015 || reg_mentioned_p (virtual_incoming_args_rtx, op)
7016 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
7017 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
7018 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
7021 if (GET_CODE (op) == MEM)
7027 /* Match: (mem (reg)). */
7028 if (GET_CODE (ind) == REG)
7034 if (GET_CODE (ind) == PLUS
7035 && GET_CODE (XEXP (ind, 0)) == REG
7036 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
7037 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
7044 /* Return TRUE if OP is a valid coprocessor memory address pattern.
7045 WB is true if full writeback address modes are allowed and is false
7046 if limited writeback address modes (POST_INC and PRE_DEC) are
7050 arm_coproc_mem_operand (rtx op, bool wb)
7054 /* Reject eliminable registers. */
7055 if (! (reload_in_progress || reload_completed)
7056 && ( reg_mentioned_p (frame_pointer_rtx, op)
7057 || reg_mentioned_p (arg_pointer_rtx, op)
7058 || reg_mentioned_p (virtual_incoming_args_rtx, op)
7059 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
7060 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
7061 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
7064 /* Constants are converted into offsets from labels. */
7065 if (GET_CODE (op) != MEM)
7070 if (reload_completed
7071 && (GET_CODE (ind) == LABEL_REF
7072 || (GET_CODE (ind) == CONST
7073 && GET_CODE (XEXP (ind, 0)) == PLUS
7074 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
7075 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
7078 /* Match: (mem (reg)). */
7079 if (GET_CODE (ind) == REG)
7080 return arm_address_register_rtx_p (ind, 0);
7082 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
7083 acceptable in any case (subject to verification by
7084 arm_address_register_rtx_p). We need WB to be true to accept
7085 PRE_INC and POST_DEC. */
7086 if (GET_CODE (ind) == POST_INC
7087 || GET_CODE (ind) == PRE_DEC
7089 && (GET_CODE (ind) == PRE_INC
7090 || GET_CODE (ind) == POST_DEC)))
7091 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
7094 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
7095 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
7096 && GET_CODE (XEXP (ind, 1)) == PLUS
7097 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
7098 ind = XEXP (ind, 1);
7103 if (GET_CODE (ind) == PLUS
7104 && GET_CODE (XEXP (ind, 0)) == REG
7105 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
7106 && GET_CODE (XEXP (ind, 1)) == CONST_INT
7107 && INTVAL (XEXP (ind, 1)) > -1024
7108 && INTVAL (XEXP (ind, 1)) < 1024
7109 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
7115 /* Return TRUE if OP is a memory operand which we can load or store a vector
7116 to/from. TYPE is one of the following values:
7117 0 - Vector load/stor (vldr)
7118 1 - Core registers (ldm)
7119 2 - Element/structure loads (vld1)
7122 neon_vector_mem_operand (rtx op, int type)
7126 /* Reject eliminable registers. */
7127 if (! (reload_in_progress || reload_completed)
7128 && ( reg_mentioned_p (frame_pointer_rtx, op)
7129 || reg_mentioned_p (arg_pointer_rtx, op)
7130 || reg_mentioned_p (virtual_incoming_args_rtx, op)
7131 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
7132 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
7133 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
7136 /* Constants are converted into offsets from labels. */
7137 if (GET_CODE (op) != MEM)
7142 if (reload_completed
7143 && (GET_CODE (ind) == LABEL_REF
7144 || (GET_CODE (ind) == CONST
7145 && GET_CODE (XEXP (ind, 0)) == PLUS
7146 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
7147 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
7150 /* Match: (mem (reg)). */
7151 if (GET_CODE (ind) == REG)
7152 return arm_address_register_rtx_p (ind, 0);
7154 /* Allow post-increment with Neon registers. */
7155 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
7156 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
7158 /* FIXME: vld1 allows register post-modify. */
7164 && GET_CODE (ind) == PLUS
7165 && GET_CODE (XEXP (ind, 0)) == REG
7166 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
7167 && GET_CODE (XEXP (ind, 1)) == CONST_INT
7168 && INTVAL (XEXP (ind, 1)) > -1024
7169 && INTVAL (XEXP (ind, 1)) < 1016
7170 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
7176 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
7179 neon_struct_mem_operand (rtx op)
7183 /* Reject eliminable registers. */
7184 if (! (reload_in_progress || reload_completed)
7185 && ( reg_mentioned_p (frame_pointer_rtx, op)
7186 || reg_mentioned_p (arg_pointer_rtx, op)
7187 || reg_mentioned_p (virtual_incoming_args_rtx, op)
7188 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
7189 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
7190 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
7193 /* Constants are converted into offsets from labels. */
7194 if (GET_CODE (op) != MEM)
7199 if (reload_completed
7200 && (GET_CODE (ind) == LABEL_REF
7201 || (GET_CODE (ind) == CONST
7202 && GET_CODE (XEXP (ind, 0)) == PLUS
7203 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
7204 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
7207 /* Match: (mem (reg)). */
7208 if (GET_CODE (ind) == REG)
7209 return arm_address_register_rtx_p (ind, 0);
7214 /* Return true if X is a register that will be eliminated later on. */
7216 arm_eliminable_register (rtx x)
7218 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
7219 || REGNO (x) == ARG_POINTER_REGNUM
7220 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
7221 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
7224 /* Return GENERAL_REGS if a scratch register required to reload x to/from
7225 coprocessor registers. Otherwise return NO_REGS. */
7228 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
7232 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
7234 return GENERAL_REGS;
7238 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7239 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7240 && neon_vector_mem_operand (x, 0))
7243 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
7246 return GENERAL_REGS;
7249 /* Values which must be returned in the most-significant end of the return
7253 arm_return_in_msb (const_tree valtype)
7255 return (TARGET_AAPCS_BASED
7257 && (AGGREGATE_TYPE_P (valtype)
7258 || TREE_CODE (valtype) == COMPLEX_TYPE));
7261 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
7262 Use by the Cirrus Maverick code which has to workaround
7263 a hardware bug triggered by such instructions. */
7265 arm_memory_load_p (rtx insn)
7267 rtx body, lhs, rhs;;
7269 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
7272 body = PATTERN (insn);
7274 if (GET_CODE (body) != SET)
7277 lhs = XEXP (body, 0);
7278 rhs = XEXP (body, 1);
7280 lhs = REG_OR_SUBREG_RTX (lhs);
7282 /* If the destination is not a general purpose
7283 register we do not have to worry. */
7284 if (GET_CODE (lhs) != REG
7285 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
7288 /* As well as loads from memory we also have to react
7289 to loads of invalid constants which will be turned
7290 into loads from the minipool. */
7291 return (GET_CODE (rhs) == MEM
7292 || GET_CODE (rhs) == SYMBOL_REF
7293 || note_invalid_constants (insn, -1, false));
7296 /* Return TRUE if INSN is a Cirrus instruction. */
7298 arm_cirrus_insn_p (rtx insn)
7300 enum attr_cirrus attr;
7302 /* get_attr cannot accept USE or CLOBBER. */
7304 || GET_CODE (insn) != INSN
7305 || GET_CODE (PATTERN (insn)) == USE
7306 || GET_CODE (PATTERN (insn)) == CLOBBER)
7309 attr = get_attr_cirrus (insn);
7311 return attr != CIRRUS_NOT;
7314 /* Cirrus reorg for invalid instruction combinations. */
7316 cirrus_reorg (rtx first)
7318 enum attr_cirrus attr;
7319 rtx body = PATTERN (first);
7323 /* Any branch must be followed by 2 non Cirrus instructions. */
7324 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
7327 t = next_nonnote_insn (first);
7329 if (arm_cirrus_insn_p (t))
7332 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
7336 emit_insn_after (gen_nop (), first);
7341 /* (float (blah)) is in parallel with a clobber. */
7342 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
7343 body = XVECEXP (body, 0, 0);
7345 if (GET_CODE (body) == SET)
7347 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
7349 /* cfldrd, cfldr64, cfstrd, cfstr64 must
7350 be followed by a non Cirrus insn. */
7351 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
7353 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
7354 emit_insn_after (gen_nop (), first);
7358 else if (arm_memory_load_p (first))
7360 unsigned int arm_regno;
7362 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
7363 ldr/cfmv64hr combination where the Rd field is the same
7364 in both instructions must be split with a non Cirrus
7371 /* Get Arm register number for ldr insn. */
7372 if (GET_CODE (lhs) == REG)
7373 arm_regno = REGNO (lhs);
7376 gcc_assert (GET_CODE (rhs) == REG);
7377 arm_regno = REGNO (rhs);
7381 first = next_nonnote_insn (first);
7383 if (! arm_cirrus_insn_p (first))
7386 body = PATTERN (first);
7388 /* (float (blah)) is in parallel with a clobber. */
7389 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
7390 body = XVECEXP (body, 0, 0);
7392 if (GET_CODE (body) == FLOAT)
7393 body = XEXP (body, 0);
7395 if (get_attr_cirrus (first) == CIRRUS_MOVE
7396 && GET_CODE (XEXP (body, 1)) == REG
7397 && arm_regno == REGNO (XEXP (body, 1)))
7398 emit_insn_after (gen_nop (), first);
7404 /* get_attr cannot accept USE or CLOBBER. */
7406 || GET_CODE (first) != INSN
7407 || GET_CODE (PATTERN (first)) == USE
7408 || GET_CODE (PATTERN (first)) == CLOBBER)
7411 attr = get_attr_cirrus (first);
7413 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
7414 must be followed by a non-coprocessor instruction. */
7415 if (attr == CIRRUS_COMPARE)
7419 t = next_nonnote_insn (first);
7421 if (arm_cirrus_insn_p (t))
7424 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
7428 emit_insn_after (gen_nop (), first);
7434 /* Return TRUE if X references a SYMBOL_REF. */
7436 symbol_mentioned_p (rtx x)
7441 if (GET_CODE (x) == SYMBOL_REF)
7444 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
7445 are constant offsets, not symbols. */
7446 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7449 fmt = GET_RTX_FORMAT (GET_CODE (x));
7451 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7457 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7458 if (symbol_mentioned_p (XVECEXP (x, i, j)))
7461 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
7468 /* Return TRUE if X references a LABEL_REF. */
7470 label_mentioned_p (rtx x)
7475 if (GET_CODE (x) == LABEL_REF)
7478 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
7479 instruction, but they are constant offsets, not symbols. */
7480 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7483 fmt = GET_RTX_FORMAT (GET_CODE (x));
7484 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7490 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7491 if (label_mentioned_p (XVECEXP (x, i, j)))
7494 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
7502 tls_mentioned_p (rtx x)
7504 switch (GET_CODE (x))
7507 return tls_mentioned_p (XEXP (x, 0));
7510 if (XINT (x, 1) == UNSPEC_TLS)
7518 /* Must not copy a SET whose source operand is PC-relative. */
7521 arm_cannot_copy_insn_p (rtx insn)
7523 rtx pat = PATTERN (insn);
7525 if (GET_CODE (pat) == SET)
7527 rtx rhs = SET_SRC (pat);
7529 if (GET_CODE (rhs) == UNSPEC
7530 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
7533 if (GET_CODE (rhs) == MEM
7534 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
7535 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
7545 enum rtx_code code = GET_CODE (x);
7562 /* Return 1 if memory locations are adjacent. */
7564 adjacent_mem_locations (rtx a, rtx b)
7566 /* We don't guarantee to preserve the order of these memory refs. */
7567 if (volatile_refs_p (a) || volatile_refs_p (b))
7570 if ((GET_CODE (XEXP (a, 0)) == REG
7571 || (GET_CODE (XEXP (a, 0)) == PLUS
7572 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
7573 && (GET_CODE (XEXP (b, 0)) == REG
7574 || (GET_CODE (XEXP (b, 0)) == PLUS
7575 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
7577 HOST_WIDE_INT val0 = 0, val1 = 0;
7581 if (GET_CODE (XEXP (a, 0)) == PLUS)
7583 reg0 = XEXP (XEXP (a, 0), 0);
7584 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
7589 if (GET_CODE (XEXP (b, 0)) == PLUS)
7591 reg1 = XEXP (XEXP (b, 0), 0);
7592 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
7597 /* Don't accept any offset that will require multiple
7598 instructions to handle, since this would cause the
7599 arith_adjacentmem pattern to output an overlong sequence. */
7600 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
7603 /* Don't allow an eliminable register: register elimination can make
7604 the offset too large. */
7605 if (arm_eliminable_register (reg0))
7608 val_diff = val1 - val0;
7612 /* If the target has load delay slots, then there's no benefit
7613 to using an ldm instruction unless the offset is zero and
7614 we are optimizing for size. */
7615 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
7616 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
7617 && (val_diff == 4 || val_diff == -4));
7620 return ((REGNO (reg0) == REGNO (reg1))
7621 && (val_diff == 4 || val_diff == -4));
7628 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7629 HOST_WIDE_INT *load_offset)
7631 int unsorted_regs[4];
7632 HOST_WIDE_INT unsorted_offsets[4];
7637 /* Can only handle 2, 3, or 4 insns at present,
7638 though could be easily extended if required. */
7639 gcc_assert (nops >= 2 && nops <= 4);
7641 memset (order, 0, 4 * sizeof (int));
7643 /* Loop over the operands and check that the memory references are
7644 suitable (i.e. immediate offsets from the same base register). At
7645 the same time, extract the target register, and the memory
7647 for (i = 0; i < nops; i++)
7652 /* Convert a subreg of a mem into the mem itself. */
7653 if (GET_CODE (operands[nops + i]) == SUBREG)
7654 operands[nops + i] = alter_subreg (operands + (nops + i));
7656 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7658 /* Don't reorder volatile memory references; it doesn't seem worth
7659 looking for the case where the order is ok anyway. */
7660 if (MEM_VOLATILE_P (operands[nops + i]))
7663 offset = const0_rtx;
7665 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7666 || (GET_CODE (reg) == SUBREG
7667 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7668 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7669 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7671 || (GET_CODE (reg) == SUBREG
7672 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7673 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7678 base_reg = REGNO (reg);
7679 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7680 ? REGNO (operands[i])
7681 : REGNO (SUBREG_REG (operands[i])));
7686 if (base_reg != (int) REGNO (reg))
7687 /* Not addressed from the same base register. */
7690 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7691 ? REGNO (operands[i])
7692 : REGNO (SUBREG_REG (operands[i])));
7693 if (unsorted_regs[i] < unsorted_regs[order[0]])
7697 /* If it isn't an integer register, or if it overwrites the
7698 base register but isn't the last insn in the list, then
7699 we can't do this. */
7700 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
7701 || (i != nops - 1 && unsorted_regs[i] == base_reg))
7704 unsorted_offsets[i] = INTVAL (offset);
7707 /* Not a suitable memory address. */
7711 /* All the useful information has now been extracted from the
7712 operands into unsorted_regs and unsorted_offsets; additionally,
7713 order[0] has been set to the lowest numbered register in the
7714 list. Sort the registers into order, and check that the memory
7715 offsets are ascending and adjacent. */
7717 for (i = 1; i < nops; i++)
7721 order[i] = order[i - 1];
7722 for (j = 0; j < nops; j++)
7723 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7724 && (order[i] == order[i - 1]
7725 || unsorted_regs[j] < unsorted_regs[order[i]]))
7728 /* Have we found a suitable register? if not, one must be used more
7730 if (order[i] == order[i - 1])
7733 /* Is the memory address adjacent and ascending? */
7734 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7742 for (i = 0; i < nops; i++)
7743 regs[i] = unsorted_regs[order[i]];
7745 *load_offset = unsorted_offsets[order[0]];
7748 if (unsorted_offsets[order[0]] == 0)
7749 return 1; /* ldmia */
7751 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
7752 return 2; /* ldmib */
7754 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
7755 return 3; /* ldmda */
7757 if (unsorted_offsets[order[nops - 1]] == -4)
7758 return 4; /* ldmdb */
7760 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
7761 if the offset isn't small enough. The reason 2 ldrs are faster
7762 is because these ARMs are able to do more than one cache access
7763 in a single cycle. The ARM9 and StrongARM have Harvard caches,
7764 whilst the ARM8 has a double bandwidth cache. This means that
7765 these cores can do both an instruction fetch and a data fetch in
7766 a single cycle, so the trick of calculating the address into a
7767 scratch register (one of the result regs) and then doing a load
7768 multiple actually becomes slower (and no smaller in code size).
7769 That is the transformation
7771 ldr rd1, [rbase + offset]
7772 ldr rd2, [rbase + offset + 4]
7776 add rd1, rbase, offset
7777 ldmia rd1, {rd1, rd2}
7779 produces worse code -- '3 cycles + any stalls on rd2' instead of
7780 '2 cycles + any stalls on rd2'. On ARMs with only one cache
7781 access per cycle, the first sequence could never complete in less
7782 than 6 cycles, whereas the ldm sequence would only take 5 and
7783 would make better use of sequential accesses if not hitting the
7786 We cheat here and test 'arm_ld_sched' which we currently know to
7787 only be true for the ARM8, ARM9 and StrongARM. If this ever
7788 changes, then the test below needs to be reworked. */
7789 if (nops == 2 && arm_ld_sched)
7792 /* Can't do it without setting up the offset, only do this if it takes
7793 no more than one insn. */
7794 return (const_ok_for_arm (unsorted_offsets[order[0]])
7795 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
7799 emit_ldm_seq (rtx *operands, int nops)
7803 HOST_WIDE_INT offset;
7807 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7810 strcpy (buf, "ldm%(ia%)\t");
7814 strcpy (buf, "ldm%(ib%)\t");
7818 strcpy (buf, "ldm%(da%)\t");
7822 strcpy (buf, "ldm%(db%)\t");
7827 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7828 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7831 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7832 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7834 output_asm_insn (buf, operands);
7836 strcpy (buf, "ldm%(ia%)\t");
7843 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7844 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7846 for (i = 1; i < nops; i++)
7847 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7848 reg_names[regs[i]]);
7850 strcat (buf, "}\t%@ phole ldm");
7852 output_asm_insn (buf, operands);
7857 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7858 HOST_WIDE_INT * load_offset)
7860 int unsorted_regs[4];
7861 HOST_WIDE_INT unsorted_offsets[4];
7866 /* Can only handle 2, 3, or 4 insns at present, though could be easily
7867 extended if required. */
7868 gcc_assert (nops >= 2 && nops <= 4);
7870 memset (order, 0, 4 * sizeof (int));
7872 /* Loop over the operands and check that the memory references are
7873 suitable (i.e. immediate offsets from the same base register). At
7874 the same time, extract the target register, and the memory
7876 for (i = 0; i < nops; i++)
7881 /* Convert a subreg of a mem into the mem itself. */
7882 if (GET_CODE (operands[nops + i]) == SUBREG)
7883 operands[nops + i] = alter_subreg (operands + (nops + i));
7885 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7887 /* Don't reorder volatile memory references; it doesn't seem worth
7888 looking for the case where the order is ok anyway. */
7889 if (MEM_VOLATILE_P (operands[nops + i]))
7892 offset = const0_rtx;
7894 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7895 || (GET_CODE (reg) == SUBREG
7896 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7897 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7898 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7900 || (GET_CODE (reg) == SUBREG
7901 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7902 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7907 base_reg = REGNO (reg);
7908 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7909 ? REGNO (operands[i])
7910 : REGNO (SUBREG_REG (operands[i])));
7915 if (base_reg != (int) REGNO (reg))
7916 /* Not addressed from the same base register. */
7919 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7920 ? REGNO (operands[i])
7921 : REGNO (SUBREG_REG (operands[i])));
7922 if (unsorted_regs[i] < unsorted_regs[order[0]])
7926 /* If it isn't an integer register, then we can't do this. */
7927 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
7930 unsorted_offsets[i] = INTVAL (offset);
7933 /* Not a suitable memory address. */
7937 /* All the useful information has now been extracted from the
7938 operands into unsorted_regs and unsorted_offsets; additionally,
7939 order[0] has been set to the lowest numbered register in the
7940 list. Sort the registers into order, and check that the memory
7941 offsets are ascending and adjacent. */
7943 for (i = 1; i < nops; i++)
7947 order[i] = order[i - 1];
7948 for (j = 0; j < nops; j++)
7949 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7950 && (order[i] == order[i - 1]
7951 || unsorted_regs[j] < unsorted_regs[order[i]]))
7954 /* Have we found a suitable register? if not, one must be used more
7956 if (order[i] == order[i - 1])
7959 /* Is the memory address adjacent and ascending? */
7960 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7968 for (i = 0; i < nops; i++)
7969 regs[i] = unsorted_regs[order[i]];
7971 *load_offset = unsorted_offsets[order[0]];
7974 if (unsorted_offsets[order[0]] == 0)
7975 return 1; /* stmia */
7977 if (unsorted_offsets[order[0]] == 4)
7978 return 2; /* stmib */
7980 if (unsorted_offsets[order[nops - 1]] == 0)
7981 return 3; /* stmda */
7983 if (unsorted_offsets[order[nops - 1]] == -4)
7984 return 4; /* stmdb */
7990 emit_stm_seq (rtx *operands, int nops)
7994 HOST_WIDE_INT offset;
7998 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
8001 strcpy (buf, "stm%(ia%)\t");
8005 strcpy (buf, "stm%(ib%)\t");
8009 strcpy (buf, "stm%(da%)\t");
8013 strcpy (buf, "stm%(db%)\t");
8020 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
8021 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
8023 for (i = 1; i < nops; i++)
8024 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
8025 reg_names[regs[i]]);
8027 strcat (buf, "}\t%@ phole stm");
8029 output_asm_insn (buf, operands);
8033 /* Routines for use in generating RTL. */
8036 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
8037 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
8039 HOST_WIDE_INT offset = *offsetp;
8042 int sign = up ? 1 : -1;
8045 /* XScale has load-store double instructions, but they have stricter
8046 alignment requirements than load-store multiple, so we cannot
8049 For XScale ldm requires 2 + NREGS cycles to complete and blocks
8050 the pipeline until completion.
8058 An ldr instruction takes 1-3 cycles, but does not block the
8067 Best case ldr will always win. However, the more ldr instructions
8068 we issue, the less likely we are to be able to schedule them well.
8069 Using ldr instructions also increases code size.
8071 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
8072 for counts of 3 or 4 regs. */
8073 if (arm_tune_xscale && count <= 2 && ! optimize_size)
8079 for (i = 0; i < count; i++)
8081 addr = plus_constant (from, i * 4 * sign);
8082 mem = adjust_automodify_address (basemem, SImode, addr, offset);
8083 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
8089 emit_move_insn (from, plus_constant (from, count * 4 * sign));
8099 result = gen_rtx_PARALLEL (VOIDmode,
8100 rtvec_alloc (count + (write_back ? 1 : 0)));
8103 XVECEXP (result, 0, 0)
8104 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
8109 for (j = 0; i < count; i++, j++)
8111 addr = plus_constant (from, j * 4 * sign);
8112 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
8113 XVECEXP (result, 0, i)
8114 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
8125 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
8126 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
8128 HOST_WIDE_INT offset = *offsetp;
8131 int sign = up ? 1 : -1;
8134 /* See arm_gen_load_multiple for discussion of
8135 the pros/cons of ldm/stm usage for XScale. */
8136 if (arm_tune_xscale && count <= 2 && ! optimize_size)
8142 for (i = 0; i < count; i++)
8144 addr = plus_constant (to, i * 4 * sign);
8145 mem = adjust_automodify_address (basemem, SImode, addr, offset);
8146 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
8152 emit_move_insn (to, plus_constant (to, count * 4 * sign));
8162 result = gen_rtx_PARALLEL (VOIDmode,
8163 rtvec_alloc (count + (write_back ? 1 : 0)));
8166 XVECEXP (result, 0, 0)
8167 = gen_rtx_SET (VOIDmode, to,
8168 plus_constant (to, count * 4 * sign));
8173 for (j = 0; i < count; i++, j++)
8175 addr = plus_constant (to, j * 4 * sign);
8176 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
8177 XVECEXP (result, 0, i)
8178 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
8189 arm_gen_movmemqi (rtx *operands)
8191 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
8192 HOST_WIDE_INT srcoffset, dstoffset;
8194 rtx src, dst, srcbase, dstbase;
8195 rtx part_bytes_reg = NULL;
8198 if (GET_CODE (operands[2]) != CONST_INT
8199 || GET_CODE (operands[3]) != CONST_INT
8200 || INTVAL (operands[2]) > 64
8201 || INTVAL (operands[3]) & 3)
8204 dstbase = operands[0];
8205 srcbase = operands[1];
8207 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
8208 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
8210 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
8211 out_words_to_go = INTVAL (operands[2]) / 4;
8212 last_bytes = INTVAL (operands[2]) & 3;
8213 dstoffset = srcoffset = 0;
8215 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
8216 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
8218 for (i = 0; in_words_to_go >= 2; i+=4)
8220 if (in_words_to_go > 4)
8221 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
8222 srcbase, &srcoffset));
8224 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
8225 FALSE, srcbase, &srcoffset));
8227 if (out_words_to_go)
8229 if (out_words_to_go > 4)
8230 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
8231 dstbase, &dstoffset));
8232 else if (out_words_to_go != 1)
8233 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
8237 dstbase, &dstoffset));
8240 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
8241 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
8242 if (last_bytes != 0)
8244 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
8250 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
8251 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
8254 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
8255 if (out_words_to_go)
8259 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
8260 sreg = copy_to_reg (mem);
8262 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
8263 emit_move_insn (mem, sreg);
8266 gcc_assert (!in_words_to_go); /* Sanity check */
8271 gcc_assert (in_words_to_go > 0);
8273 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
8274 part_bytes_reg = copy_to_mode_reg (SImode, mem);
8277 gcc_assert (!last_bytes || part_bytes_reg);
8279 if (BYTES_BIG_ENDIAN && last_bytes)
8281 rtx tmp = gen_reg_rtx (SImode);
8283 /* The bytes we want are in the top end of the word. */
8284 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
8285 GEN_INT (8 * (4 - last_bytes))));
8286 part_bytes_reg = tmp;
8290 mem = adjust_automodify_address (dstbase, QImode,
8291 plus_constant (dst, last_bytes - 1),
8292 dstoffset + last_bytes - 1);
8293 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
8297 tmp = gen_reg_rtx (SImode);
8298 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
8299 part_bytes_reg = tmp;
8308 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
8309 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
8313 rtx tmp = gen_reg_rtx (SImode);
8314 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
8315 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
8316 part_bytes_reg = tmp;
8323 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
8324 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
8331 /* Select a dominance comparison mode if possible for a test of the general
8332 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
8333 COND_OR == DOM_CC_X_AND_Y => (X && Y)
8334 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
8335 COND_OR == DOM_CC_X_OR_Y => (X || Y)
8336 In all cases OP will be either EQ or NE, but we don't need to know which
8337 here. If we are unable to support a dominance comparison we return
8338 CC mode. This will then fail to match for the RTL expressions that
8339 generate this call. */
8341 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
8343 enum rtx_code cond1, cond2;
8346 /* Currently we will probably get the wrong result if the individual
8347 comparisons are not simple. This also ensures that it is safe to
8348 reverse a comparison if necessary. */
8349 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
8351 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
8355 /* The if_then_else variant of this tests the second condition if the
8356 first passes, but is true if the first fails. Reverse the first
8357 condition to get a true "inclusive-or" expression. */
8358 if (cond_or == DOM_CC_NX_OR_Y)
8359 cond1 = reverse_condition (cond1);
8361 /* If the comparisons are not equal, and one doesn't dominate the other,
8362 then we can't do this. */
8364 && !comparison_dominates_p (cond1, cond2)
8365 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
8370 enum rtx_code temp = cond1;
8378 if (cond_or == DOM_CC_X_AND_Y)
8383 case EQ: return CC_DEQmode;
8384 case LE: return CC_DLEmode;
8385 case LEU: return CC_DLEUmode;
8386 case GE: return CC_DGEmode;
8387 case GEU: return CC_DGEUmode;
8388 default: gcc_unreachable ();
8392 if (cond_or == DOM_CC_X_AND_Y)
8408 if (cond_or == DOM_CC_X_AND_Y)
8424 if (cond_or == DOM_CC_X_AND_Y)
8440 if (cond_or == DOM_CC_X_AND_Y)
8455 /* The remaining cases only occur when both comparisons are the
8458 gcc_assert (cond1 == cond2);
8462 gcc_assert (cond1 == cond2);
8466 gcc_assert (cond1 == cond2);
8470 gcc_assert (cond1 == cond2);
8474 gcc_assert (cond1 == cond2);
8483 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
8485 /* All floating point compares return CCFP if it is an equality
8486 comparison, and CCFPE otherwise. */
8487 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
8507 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
8516 /* A compare with a shifted operand. Because of canonicalization, the
8517 comparison will have to be swapped when we emit the assembler. */
8518 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
8519 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
8520 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
8521 || GET_CODE (x) == ROTATERT))
8524 /* This operation is performed swapped, but since we only rely on the Z
8525 flag we don't need an additional mode. */
8526 if (GET_MODE (y) == SImode && REG_P (y)
8527 && GET_CODE (x) == NEG
8528 && (op == EQ || op == NE))
8531 /* This is a special case that is used by combine to allow a
8532 comparison of a shifted byte load to be split into a zero-extend
8533 followed by a comparison of the shifted integer (only valid for
8534 equalities and unsigned inequalities). */
8535 if (GET_MODE (x) == SImode
8536 && GET_CODE (x) == ASHIFT
8537 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
8538 && GET_CODE (XEXP (x, 0)) == SUBREG
8539 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
8540 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
8541 && (op == EQ || op == NE
8542 || op == GEU || op == GTU || op == LTU || op == LEU)
8543 && GET_CODE (y) == CONST_INT)
8546 /* A construct for a conditional compare, if the false arm contains
8547 0, then both conditions must be true, otherwise either condition
8548 must be true. Not all conditions are possible, so CCmode is
8549 returned if it can't be done. */
8550 if (GET_CODE (x) == IF_THEN_ELSE
8551 && (XEXP (x, 2) == const0_rtx
8552 || XEXP (x, 2) == const1_rtx)
8553 && COMPARISON_P (XEXP (x, 0))
8554 && COMPARISON_P (XEXP (x, 1)))
8555 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
8556 INTVAL (XEXP (x, 2)));
8558 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
8559 if (GET_CODE (x) == AND
8560 && COMPARISON_P (XEXP (x, 0))
8561 && COMPARISON_P (XEXP (x, 1)))
8562 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
8565 if (GET_CODE (x) == IOR
8566 && COMPARISON_P (XEXP (x, 0))
8567 && COMPARISON_P (XEXP (x, 1)))
8568 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
8571 /* An operation (on Thumb) where we want to test for a single bit.
8572 This is done by shifting that bit up into the top bit of a
8573 scratch register; we can then branch on the sign bit. */
8575 && GET_MODE (x) == SImode
8576 && (op == EQ || op == NE)
8577 && GET_CODE (x) == ZERO_EXTRACT
8578 && XEXP (x, 1) == const1_rtx)
8581 /* An operation that sets the condition codes as a side-effect, the
8582 V flag is not set correctly, so we can only use comparisons where
8583 this doesn't matter. (For LT and GE we can use "mi" and "pl"
8585 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
8586 if (GET_MODE (x) == SImode
8588 && (op == EQ || op == NE || op == LT || op == GE)
8589 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
8590 || GET_CODE (x) == AND || GET_CODE (x) == IOR
8591 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
8592 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
8593 || GET_CODE (x) == LSHIFTRT
8594 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
8595 || GET_CODE (x) == ROTATERT
8596 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
8599 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
8602 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
8603 && GET_CODE (x) == PLUS
8604 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
8610 /* X and Y are two things to compare using CODE. Emit the compare insn and
8611 return the rtx for register 0 in the proper mode. FP means this is a
8612 floating point compare: I don't think that it is needed on the arm. */
8614 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
8616 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
8617 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
8619 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
8624 /* Generate a sequence of insns that will generate the correct return
8625 address mask depending on the physical architecture that the program
8628 arm_gen_return_addr_mask (void)
8630 rtx reg = gen_reg_rtx (Pmode);
8632 emit_insn (gen_return_addr_mask (reg));
8637 arm_reload_in_hi (rtx *operands)
8639 rtx ref = operands[1];
8641 HOST_WIDE_INT offset = 0;
8643 if (GET_CODE (ref) == SUBREG)
8645 offset = SUBREG_BYTE (ref);
8646 ref = SUBREG_REG (ref);
8649 if (GET_CODE (ref) == REG)
8651 /* We have a pseudo which has been spilt onto the stack; there
8652 are two cases here: the first where there is a simple
8653 stack-slot replacement and a second where the stack-slot is
8654 out of range, or is used as a subreg. */
8655 if (reg_equiv_mem[REGNO (ref)])
8657 ref = reg_equiv_mem[REGNO (ref)];
8658 base = find_replacement (&XEXP (ref, 0));
8661 /* The slot is out of range, or was dressed up in a SUBREG. */
8662 base = reg_equiv_address[REGNO (ref)];
8665 base = find_replacement (&XEXP (ref, 0));
8667 /* Handle the case where the address is too complex to be offset by 1. */
8668 if (GET_CODE (base) == MINUS
8669 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8671 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8673 emit_set_insn (base_plus, base);
8676 else if (GET_CODE (base) == PLUS)
8678 /* The addend must be CONST_INT, or we would have dealt with it above. */
8679 HOST_WIDE_INT hi, lo;
8681 offset += INTVAL (XEXP (base, 1));
8682 base = XEXP (base, 0);
8684 /* Rework the address into a legal sequence of insns. */
8685 /* Valid range for lo is -4095 -> 4095 */
8688 : -((-offset) & 0xfff));
8690 /* Corner case, if lo is the max offset then we would be out of range
8691 once we have added the additional 1 below, so bump the msb into the
8692 pre-loading insn(s). */
8696 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8697 ^ (HOST_WIDE_INT) 0x80000000)
8698 - (HOST_WIDE_INT) 0x80000000);
8700 gcc_assert (hi + lo == offset);
8704 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8706 /* Get the base address; addsi3 knows how to handle constants
8707 that require more than one insn. */
8708 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8714 /* Operands[2] may overlap operands[0] (though it won't overlap
8715 operands[1]), that's why we asked for a DImode reg -- so we can
8716 use the bit that does not overlap. */
8717 if (REGNO (operands[2]) == REGNO (operands[0]))
8718 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8720 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8722 emit_insn (gen_zero_extendqisi2 (scratch,
8723 gen_rtx_MEM (QImode,
8724 plus_constant (base,
8726 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
8727 gen_rtx_MEM (QImode,
8728 plus_constant (base,
8730 if (!BYTES_BIG_ENDIAN)
8731 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8732 gen_rtx_IOR (SImode,
8735 gen_rtx_SUBREG (SImode, operands[0], 0),
8739 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8740 gen_rtx_IOR (SImode,
8741 gen_rtx_ASHIFT (SImode, scratch,
8743 gen_rtx_SUBREG (SImode, operands[0], 0)));
8746 /* Handle storing a half-word to memory during reload by synthesizing as two
8747 byte stores. Take care not to clobber the input values until after we
8748 have moved them somewhere safe. This code assumes that if the DImode
8749 scratch in operands[2] overlaps either the input value or output address
8750 in some way, then that value must die in this insn (we absolutely need
8751 two scratch registers for some corner cases). */
8753 arm_reload_out_hi (rtx *operands)
8755 rtx ref = operands[0];
8756 rtx outval = operands[1];
8758 HOST_WIDE_INT offset = 0;
8760 if (GET_CODE (ref) == SUBREG)
8762 offset = SUBREG_BYTE (ref);
8763 ref = SUBREG_REG (ref);
8766 if (GET_CODE (ref) == REG)
8768 /* We have a pseudo which has been spilt onto the stack; there
8769 are two cases here: the first where there is a simple
8770 stack-slot replacement and a second where the stack-slot is
8771 out of range, or is used as a subreg. */
8772 if (reg_equiv_mem[REGNO (ref)])
8774 ref = reg_equiv_mem[REGNO (ref)];
8775 base = find_replacement (&XEXP (ref, 0));
8778 /* The slot is out of range, or was dressed up in a SUBREG. */
8779 base = reg_equiv_address[REGNO (ref)];
8782 base = find_replacement (&XEXP (ref, 0));
8784 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8786 /* Handle the case where the address is too complex to be offset by 1. */
8787 if (GET_CODE (base) == MINUS
8788 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8790 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8792 /* Be careful not to destroy OUTVAL. */
8793 if (reg_overlap_mentioned_p (base_plus, outval))
8795 /* Updating base_plus might destroy outval, see if we can
8796 swap the scratch and base_plus. */
8797 if (!reg_overlap_mentioned_p (scratch, outval))
8800 scratch = base_plus;
8805 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8807 /* Be conservative and copy OUTVAL into the scratch now,
8808 this should only be necessary if outval is a subreg
8809 of something larger than a word. */
8810 /* XXX Might this clobber base? I can't see how it can,
8811 since scratch is known to overlap with OUTVAL, and
8812 must be wider than a word. */
8813 emit_insn (gen_movhi (scratch_hi, outval));
8814 outval = scratch_hi;
8818 emit_set_insn (base_plus, base);
8821 else if (GET_CODE (base) == PLUS)
8823 /* The addend must be CONST_INT, or we would have dealt with it above. */
8824 HOST_WIDE_INT hi, lo;
8826 offset += INTVAL (XEXP (base, 1));
8827 base = XEXP (base, 0);
8829 /* Rework the address into a legal sequence of insns. */
8830 /* Valid range for lo is -4095 -> 4095 */
8833 : -((-offset) & 0xfff));
8835 /* Corner case, if lo is the max offset then we would be out of range
8836 once we have added the additional 1 below, so bump the msb into the
8837 pre-loading insn(s). */
8841 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8842 ^ (HOST_WIDE_INT) 0x80000000)
8843 - (HOST_WIDE_INT) 0x80000000);
8845 gcc_assert (hi + lo == offset);
8849 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8851 /* Be careful not to destroy OUTVAL. */
8852 if (reg_overlap_mentioned_p (base_plus, outval))
8854 /* Updating base_plus might destroy outval, see if we
8855 can swap the scratch and base_plus. */
8856 if (!reg_overlap_mentioned_p (scratch, outval))
8859 scratch = base_plus;
8864 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8866 /* Be conservative and copy outval into scratch now,
8867 this should only be necessary if outval is a
8868 subreg of something larger than a word. */
8869 /* XXX Might this clobber base? I can't see how it
8870 can, since scratch is known to overlap with
8872 emit_insn (gen_movhi (scratch_hi, outval));
8873 outval = scratch_hi;
8877 /* Get the base address; addsi3 knows how to handle constants
8878 that require more than one insn. */
8879 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8885 if (BYTES_BIG_ENDIAN)
8887 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8888 plus_constant (base, offset + 1)),
8889 gen_lowpart (QImode, outval)));
8890 emit_insn (gen_lshrsi3 (scratch,
8891 gen_rtx_SUBREG (SImode, outval, 0),
8893 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8894 gen_lowpart (QImode, scratch)));
8898 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8899 gen_lowpart (QImode, outval)));
8900 emit_insn (gen_lshrsi3 (scratch,
8901 gen_rtx_SUBREG (SImode, outval, 0),
8903 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8904 plus_constant (base, offset + 1)),
8905 gen_lowpart (QImode, scratch)));
8909 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
8910 (padded to the size of a word) should be passed in a register. */
8913 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
8915 if (TARGET_AAPCS_BASED)
8916 return must_pass_in_stack_var_size (mode, type);
8918 return must_pass_in_stack_var_size_or_pad (mode, type);
8922 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
8923 Return true if an argument passed on the stack should be padded upwards,
8924 i.e. if the least-significant byte has useful data.
8925 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
8926 aggregate types are placed in the lowest memory address. */
8929 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
8931 if (!TARGET_AAPCS_BASED)
8932 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
8934 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
8941 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
8942 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
8943 byte of the register has useful data, and return the opposite if the
8944 most significant byte does.
8945 For AAPCS, small aggregates and small complex types are always padded
8949 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
8950 tree type, int first ATTRIBUTE_UNUSED)
8952 if (TARGET_AAPCS_BASED
8954 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
8955 && int_size_in_bytes (type) <= 4)
8958 /* Otherwise, use default padding. */
8959 return !BYTES_BIG_ENDIAN;
8963 /* Print a symbolic form of X to the debug file, F. */
8965 arm_print_value (FILE *f, rtx x)
8967 switch (GET_CODE (x))
8970 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
8974 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
8982 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
8984 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
8985 if (i < (CONST_VECTOR_NUNITS (x) - 1))
8993 fprintf (f, "\"%s\"", XSTR (x, 0));
8997 fprintf (f, "`%s'", XSTR (x, 0));
9001 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
9005 arm_print_value (f, XEXP (x, 0));
9009 arm_print_value (f, XEXP (x, 0));
9011 arm_print_value (f, XEXP (x, 1));
9019 fprintf (f, "????");
9024 /* Routines for manipulation of the constant pool. */
9026 /* Arm instructions cannot load a large constant directly into a
9027 register; they have to come from a pc relative load. The constant
9028 must therefore be placed in the addressable range of the pc
9029 relative load. Depending on the precise pc relative load
9030 instruction the range is somewhere between 256 bytes and 4k. This
9031 means that we often have to dump a constant inside a function, and
9032 generate code to branch around it.
9034 It is important to minimize this, since the branches will slow
9035 things down and make the code larger.
9037 Normally we can hide the table after an existing unconditional
9038 branch so that there is no interruption of the flow, but in the
9039 worst case the code looks like this:
9057 We fix this by performing a scan after scheduling, which notices
9058 which instructions need to have their operands fetched from the
9059 constant table and builds the table.
9061 The algorithm starts by building a table of all the constants that
9062 need fixing up and all the natural barriers in the function (places
9063 where a constant table can be dropped without breaking the flow).
9064 For each fixup we note how far the pc-relative replacement will be
9065 able to reach and the offset of the instruction into the function.
9067 Having built the table we then group the fixes together to form
9068 tables that are as large as possible (subject to addressing
9069 constraints) and emit each table of constants after the last
9070 barrier that is within range of all the instructions in the group.
9071 If a group does not contain a barrier, then we forcibly create one
9072 by inserting a jump instruction into the flow. Once the table has
9073 been inserted, the insns are then modified to reference the
9074 relevant entry in the pool.
9076 Possible enhancements to the algorithm (not implemented) are:
9078 1) For some processors and object formats, there may be benefit in
9079 aligning the pools to the start of cache lines; this alignment
9080 would need to be taken into account when calculating addressability
9083 /* These typedefs are located at the start of this file, so that
9084 they can be used in the prototypes there. This comment is to
9085 remind readers of that fact so that the following structures
9086 can be understood more easily.
9088 typedef struct minipool_node Mnode;
9089 typedef struct minipool_fixup Mfix; */
9091 struct minipool_node
9093 /* Doubly linked chain of entries. */
9096 /* The maximum offset into the code that this entry can be placed. While
9097 pushing fixes for forward references, all entries are sorted in order
9098 of increasing max_address. */
9099 HOST_WIDE_INT max_address;
9100 /* Similarly for an entry inserted for a backwards ref. */
9101 HOST_WIDE_INT min_address;
9102 /* The number of fixes referencing this entry. This can become zero
9103 if we "unpush" an entry. In this case we ignore the entry when we
9104 come to emit the code. */
9106 /* The offset from the start of the minipool. */
9107 HOST_WIDE_INT offset;
9108 /* The value in table. */
9110 /* The mode of value. */
9111 enum machine_mode mode;
9112 /* The size of the value. With iWMMXt enabled
9113 sizes > 4 also imply an alignment of 8-bytes. */
9117 struct minipool_fixup
9121 HOST_WIDE_INT address;
9123 enum machine_mode mode;
9127 HOST_WIDE_INT forwards;
9128 HOST_WIDE_INT backwards;
9131 /* Fixes less than a word need padding out to a word boundary. */
9132 #define MINIPOOL_FIX_SIZE(mode) \
9133 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
9135 static Mnode * minipool_vector_head;
9136 static Mnode * minipool_vector_tail;
9137 static rtx minipool_vector_label;
9138 static int minipool_pad;
9140 /* The linked list of all minipool fixes required for this function. */
9141 Mfix * minipool_fix_head;
9142 Mfix * minipool_fix_tail;
9143 /* The fix entry for the current minipool, once it has been placed. */
9144 Mfix * minipool_barrier;
9146 /* Determines if INSN is the start of a jump table. Returns the end
9147 of the TABLE or NULL_RTX. */
9149 is_jump_table (rtx insn)
9153 if (GET_CODE (insn) == JUMP_INSN
9154 && JUMP_LABEL (insn) != NULL
9155 && ((table = next_real_insn (JUMP_LABEL (insn)))
9156 == next_real_insn (insn))
9158 && GET_CODE (table) == JUMP_INSN
9159 && (GET_CODE (PATTERN (table)) == ADDR_VEC
9160 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
9166 #ifndef JUMP_TABLES_IN_TEXT_SECTION
9167 #define JUMP_TABLES_IN_TEXT_SECTION 0
9170 static HOST_WIDE_INT
9171 get_jump_table_size (rtx insn)
9173 /* ADDR_VECs only take room if read-only data does into the text
9175 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
9177 rtx body = PATTERN (insn);
9178 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
9180 HOST_WIDE_INT modesize;
9182 modesize = GET_MODE_SIZE (GET_MODE (body));
9183 size = modesize * XVECLEN (body, elt);
9187 /* Round up size of TBB table to a halfword boundary. */
9188 size = (size + 1) & ~(HOST_WIDE_INT)1;
9191 /* No padding necessary for TBH. */
9194 /* Add two bytes for alignment on Thumb. */
9207 /* Move a minipool fix MP from its current location to before MAX_MP.
9208 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
9209 constraints may need updating. */
9211 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
9212 HOST_WIDE_INT max_address)
9214 /* The code below assumes these are different. */
9215 gcc_assert (mp != max_mp);
9219 if (max_address < mp->max_address)
9220 mp->max_address = max_address;
9224 if (max_address > max_mp->max_address - mp->fix_size)
9225 mp->max_address = max_mp->max_address - mp->fix_size;
9227 mp->max_address = max_address;
9229 /* Unlink MP from its current position. Since max_mp is non-null,
9230 mp->prev must be non-null. */
9231 mp->prev->next = mp->next;
9232 if (mp->next != NULL)
9233 mp->next->prev = mp->prev;
9235 minipool_vector_tail = mp->prev;
9237 /* Re-insert it before MAX_MP. */
9239 mp->prev = max_mp->prev;
9242 if (mp->prev != NULL)
9243 mp->prev->next = mp;
9245 minipool_vector_head = mp;
9248 /* Save the new entry. */
9251 /* Scan over the preceding entries and adjust their addresses as
9253 while (mp->prev != NULL
9254 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
9256 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
9263 /* Add a constant to the minipool for a forward reference. Returns the
9264 node added or NULL if the constant will not fit in this pool. */
9266 add_minipool_forward_ref (Mfix *fix)
9268 /* If set, max_mp is the first pool_entry that has a lower
9269 constraint than the one we are trying to add. */
9270 Mnode * max_mp = NULL;
9271 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
9274 /* If the minipool starts before the end of FIX->INSN then this FIX
9275 can not be placed into the current pool. Furthermore, adding the
9276 new constant pool entry may cause the pool to start FIX_SIZE bytes
9278 if (minipool_vector_head &&
9279 (fix->address + get_attr_length (fix->insn)
9280 >= minipool_vector_head->max_address - fix->fix_size))
9283 /* Scan the pool to see if a constant with the same value has
9284 already been added. While we are doing this, also note the
9285 location where we must insert the constant if it doesn't already
9287 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9289 if (GET_CODE (fix->value) == GET_CODE (mp->value)
9290 && fix->mode == mp->mode
9291 && (GET_CODE (fix->value) != CODE_LABEL
9292 || (CODE_LABEL_NUMBER (fix->value)
9293 == CODE_LABEL_NUMBER (mp->value)))
9294 && rtx_equal_p (fix->value, mp->value))
9296 /* More than one fix references this entry. */
9298 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
9301 /* Note the insertion point if necessary. */
9303 && mp->max_address > max_address)
9306 /* If we are inserting an 8-bytes aligned quantity and
9307 we have not already found an insertion point, then
9308 make sure that all such 8-byte aligned quantities are
9309 placed at the start of the pool. */
9310 if (ARM_DOUBLEWORD_ALIGN
9312 && fix->fix_size >= 8
9313 && mp->fix_size < 8)
9316 max_address = mp->max_address;
9320 /* The value is not currently in the minipool, so we need to create
9321 a new entry for it. If MAX_MP is NULL, the entry will be put on
9322 the end of the list since the placement is less constrained than
9323 any existing entry. Otherwise, we insert the new fix before
9324 MAX_MP and, if necessary, adjust the constraints on the other
9327 mp->fix_size = fix->fix_size;
9328 mp->mode = fix->mode;
9329 mp->value = fix->value;
9331 /* Not yet required for a backwards ref. */
9332 mp->min_address = -65536;
9336 mp->max_address = max_address;
9338 mp->prev = minipool_vector_tail;
9340 if (mp->prev == NULL)
9342 minipool_vector_head = mp;
9343 minipool_vector_label = gen_label_rtx ();
9346 mp->prev->next = mp;
9348 minipool_vector_tail = mp;
9352 if (max_address > max_mp->max_address - mp->fix_size)
9353 mp->max_address = max_mp->max_address - mp->fix_size;
9355 mp->max_address = max_address;
9358 mp->prev = max_mp->prev;
9360 if (mp->prev != NULL)
9361 mp->prev->next = mp;
9363 minipool_vector_head = mp;
9366 /* Save the new entry. */
9369 /* Scan over the preceding entries and adjust their addresses as
9371 while (mp->prev != NULL
9372 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
9374 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
9382 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
9383 HOST_WIDE_INT min_address)
9385 HOST_WIDE_INT offset;
9387 /* The code below assumes these are different. */
9388 gcc_assert (mp != min_mp);
9392 if (min_address > mp->min_address)
9393 mp->min_address = min_address;
9397 /* We will adjust this below if it is too loose. */
9398 mp->min_address = min_address;
9400 /* Unlink MP from its current position. Since min_mp is non-null,
9401 mp->next must be non-null. */
9402 mp->next->prev = mp->prev;
9403 if (mp->prev != NULL)
9404 mp->prev->next = mp->next;
9406 minipool_vector_head = mp->next;
9408 /* Reinsert it after MIN_MP. */
9410 mp->next = min_mp->next;
9412 if (mp->next != NULL)
9413 mp->next->prev = mp;
9415 minipool_vector_tail = mp;
9421 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9423 mp->offset = offset;
9424 if (mp->refcount > 0)
9425 offset += mp->fix_size;
9427 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
9428 mp->next->min_address = mp->min_address + mp->fix_size;
9434 /* Add a constant to the minipool for a backward reference. Returns the
9435 node added or NULL if the constant will not fit in this pool.
9437 Note that the code for insertion for a backwards reference can be
9438 somewhat confusing because the calculated offsets for each fix do
9439 not take into account the size of the pool (which is still under
9442 add_minipool_backward_ref (Mfix *fix)
9444 /* If set, min_mp is the last pool_entry that has a lower constraint
9445 than the one we are trying to add. */
9446 Mnode *min_mp = NULL;
9447 /* This can be negative, since it is only a constraint. */
9448 HOST_WIDE_INT min_address = fix->address - fix->backwards;
9451 /* If we can't reach the current pool from this insn, or if we can't
9452 insert this entry at the end of the pool without pushing other
9453 fixes out of range, then we don't try. This ensures that we
9454 can't fail later on. */
9455 if (min_address >= minipool_barrier->address
9456 || (minipool_vector_tail->min_address + fix->fix_size
9457 >= minipool_barrier->address))
9460 /* Scan the pool to see if a constant with the same value has
9461 already been added. While we are doing this, also note the
9462 location where we must insert the constant if it doesn't already
9464 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
9466 if (GET_CODE (fix->value) == GET_CODE (mp->value)
9467 && fix->mode == mp->mode
9468 && (GET_CODE (fix->value) != CODE_LABEL
9469 || (CODE_LABEL_NUMBER (fix->value)
9470 == CODE_LABEL_NUMBER (mp->value)))
9471 && rtx_equal_p (fix->value, mp->value)
9472 /* Check that there is enough slack to move this entry to the
9473 end of the table (this is conservative). */
9475 > (minipool_barrier->address
9476 + minipool_vector_tail->offset
9477 + minipool_vector_tail->fix_size)))
9480 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
9484 mp->min_address += fix->fix_size;
9487 /* Note the insertion point if necessary. */
9488 if (mp->min_address < min_address)
9490 /* For now, we do not allow the insertion of 8-byte alignment
9491 requiring nodes anywhere but at the start of the pool. */
9492 if (ARM_DOUBLEWORD_ALIGN
9493 && fix->fix_size >= 8 && mp->fix_size < 8)
9498 else if (mp->max_address
9499 < minipool_barrier->address + mp->offset + fix->fix_size)
9501 /* Inserting before this entry would push the fix beyond
9502 its maximum address (which can happen if we have
9503 re-located a forwards fix); force the new fix to come
9505 if (ARM_DOUBLEWORD_ALIGN
9506 && fix->fix_size >= 8 && mp->fix_size < 8)
9511 min_address = mp->min_address + fix->fix_size;
9514 /* Do not insert a non-8-byte aligned quantity before 8-byte
9515 aligned quantities. */
9516 else if (ARM_DOUBLEWORD_ALIGN
9517 && fix->fix_size < 8
9518 && mp->fix_size >= 8)
9521 min_address = mp->min_address + fix->fix_size;
9526 /* We need to create a new entry. */
9528 mp->fix_size = fix->fix_size;
9529 mp->mode = fix->mode;
9530 mp->value = fix->value;
9532 mp->max_address = minipool_barrier->address + 65536;
9534 mp->min_address = min_address;
9539 mp->next = minipool_vector_head;
9541 if (mp->next == NULL)
9543 minipool_vector_tail = mp;
9544 minipool_vector_label = gen_label_rtx ();
9547 mp->next->prev = mp;
9549 minipool_vector_head = mp;
9553 mp->next = min_mp->next;
9557 if (mp->next != NULL)
9558 mp->next->prev = mp;
9560 minipool_vector_tail = mp;
9563 /* Save the new entry. */
9571 /* Scan over the following entries and adjust their offsets. */
9572 while (mp->next != NULL)
9574 if (mp->next->min_address < mp->min_address + mp->fix_size)
9575 mp->next->min_address = mp->min_address + mp->fix_size;
9578 mp->next->offset = mp->offset + mp->fix_size;
9580 mp->next->offset = mp->offset;
9589 assign_minipool_offsets (Mfix *barrier)
9591 HOST_WIDE_INT offset = 0;
9594 minipool_barrier = barrier;
9596 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9598 mp->offset = offset;
9600 if (mp->refcount > 0)
9601 offset += mp->fix_size;
9605 /* Output the literal table */
9607 dump_minipool (rtx scan)
9613 if (ARM_DOUBLEWORD_ALIGN)
9614 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
9615 if (mp->refcount > 0 && mp->fix_size >= 8)
9623 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
9624 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
9626 scan = emit_label_after (gen_label_rtx (), scan);
9627 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
9628 scan = emit_label_after (minipool_vector_label, scan);
9630 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
9632 if (mp->refcount > 0)
9637 ";; Offset %u, min %ld, max %ld ",
9638 (unsigned) mp->offset, (unsigned long) mp->min_address,
9639 (unsigned long) mp->max_address);
9640 arm_print_value (dump_file, mp->value);
9641 fputc ('\n', dump_file);
9644 switch (mp->fix_size)
9646 #ifdef HAVE_consttable_1
9648 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
9652 #ifdef HAVE_consttable_2
9654 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
9658 #ifdef HAVE_consttable_4
9660 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
9664 #ifdef HAVE_consttable_8
9666 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
9670 #ifdef HAVE_consttable_16
9672 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
9685 minipool_vector_head = minipool_vector_tail = NULL;
9686 scan = emit_insn_after (gen_consttable_end (), scan);
9687 scan = emit_barrier_after (scan);
9690 /* Return the cost of forcibly inserting a barrier after INSN. */
9692 arm_barrier_cost (rtx insn)
9694 /* Basing the location of the pool on the loop depth is preferable,
9695 but at the moment, the basic block information seems to be
9696 corrupt by this stage of the compilation. */
9698 rtx next = next_nonnote_insn (insn);
9700 if (next != NULL && GET_CODE (next) == CODE_LABEL)
9703 switch (GET_CODE (insn))
9706 /* It will always be better to place the table before the label, rather
9715 return base_cost - 10;
9718 return base_cost + 10;
9722 /* Find the best place in the insn stream in the range
9723 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
9724 Create the barrier by inserting a jump and add a new fix entry for
9727 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
9729 HOST_WIDE_INT count = 0;
9731 rtx from = fix->insn;
9732 /* The instruction after which we will insert the jump. */
9733 rtx selected = NULL;
9735 /* The address at which the jump instruction will be placed. */
9736 HOST_WIDE_INT selected_address;
9738 HOST_WIDE_INT max_count = max_address - fix->address;
9739 rtx label = gen_label_rtx ();
9741 selected_cost = arm_barrier_cost (from);
9742 selected_address = fix->address;
9744 while (from && count < max_count)
9749 /* This code shouldn't have been called if there was a natural barrier
9751 gcc_assert (GET_CODE (from) != BARRIER);
9753 /* Count the length of this insn. */
9754 count += get_attr_length (from);
9756 /* If there is a jump table, add its length. */
9757 tmp = is_jump_table (from);
9760 count += get_jump_table_size (tmp);
9762 /* Jump tables aren't in a basic block, so base the cost on
9763 the dispatch insn. If we select this location, we will
9764 still put the pool after the table. */
9765 new_cost = arm_barrier_cost (from);
9767 if (count < max_count
9768 && (!selected || new_cost <= selected_cost))
9771 selected_cost = new_cost;
9772 selected_address = fix->address + count;
9775 /* Continue after the dispatch table. */
9776 from = NEXT_INSN (tmp);
9780 new_cost = arm_barrier_cost (from);
9782 if (count < max_count
9783 && (!selected || new_cost <= selected_cost))
9786 selected_cost = new_cost;
9787 selected_address = fix->address + count;
9790 from = NEXT_INSN (from);
9793 /* Make sure that we found a place to insert the jump. */
9794 gcc_assert (selected);
9796 /* Create a new JUMP_INSN that branches around a barrier. */
9797 from = emit_jump_insn_after (gen_jump (label), selected);
9798 JUMP_LABEL (from) = label;
9799 barrier = emit_barrier_after (from);
9800 emit_label_after (label, barrier);
9802 /* Create a minipool barrier entry for the new barrier. */
9803 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
9804 new_fix->insn = barrier;
9805 new_fix->address = selected_address;
9806 new_fix->next = fix->next;
9807 fix->next = new_fix;
9812 /* Record that there is a natural barrier in the insn stream at
9815 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
9817 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9820 fix->address = address;
9823 if (minipool_fix_head != NULL)
9824 minipool_fix_tail->next = fix;
9826 minipool_fix_head = fix;
9828 minipool_fix_tail = fix;
9831 /* Record INSN, which will need fixing up to load a value from the
9832 minipool. ADDRESS is the offset of the insn since the start of the
9833 function; LOC is a pointer to the part of the insn which requires
9834 fixing; VALUE is the constant that must be loaded, which is of type
9837 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
9838 enum machine_mode mode, rtx value)
9840 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9843 fix->address = address;
9846 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
9848 fix->forwards = get_attr_pool_range (insn);
9849 fix->backwards = get_attr_neg_pool_range (insn);
9850 fix->minipool = NULL;
9852 /* If an insn doesn't have a range defined for it, then it isn't
9853 expecting to be reworked by this code. Better to stop now than
9854 to generate duff assembly code. */
9855 gcc_assert (fix->forwards || fix->backwards);
9857 /* If an entry requires 8-byte alignment then assume all constant pools
9858 require 4 bytes of padding. Trying to do this later on a per-pool
9859 basis is awkward because existing pool entries have to be modified. */
9860 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
9866 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
9867 GET_MODE_NAME (mode),
9868 INSN_UID (insn), (unsigned long) address,
9869 -1 * (long)fix->backwards, (long)fix->forwards);
9870 arm_print_value (dump_file, fix->value);
9871 fprintf (dump_file, "\n");
9874 /* Add it to the chain of fixes. */
9877 if (minipool_fix_head != NULL)
9878 minipool_fix_tail->next = fix;
9880 minipool_fix_head = fix;
9882 minipool_fix_tail = fix;
9885 /* Return the cost of synthesizing a 64-bit constant VAL inline.
9886 Returns the number of insns needed, or 99 if we don't know how to
9889 arm_const_double_inline_cost (rtx val)
9891 rtx lowpart, highpart;
9892 enum machine_mode mode;
9894 mode = GET_MODE (val);
9896 if (mode == VOIDmode)
9899 gcc_assert (GET_MODE_SIZE (mode) == 8);
9901 lowpart = gen_lowpart (SImode, val);
9902 highpart = gen_highpart_mode (SImode, mode, val);
9904 gcc_assert (GET_CODE (lowpart) == CONST_INT);
9905 gcc_assert (GET_CODE (highpart) == CONST_INT);
9907 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
9908 NULL_RTX, NULL_RTX, 0, 0)
9909 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
9910 NULL_RTX, NULL_RTX, 0, 0));
9913 /* Return true if it is worthwhile to split a 64-bit constant into two
9914 32-bit operations. This is the case if optimizing for size, or
9915 if we have load delay slots, or if one 32-bit part can be done with
9916 a single data operation. */
9918 arm_const_double_by_parts (rtx val)
9920 enum machine_mode mode = GET_MODE (val);
9923 if (optimize_size || arm_ld_sched)
9926 if (mode == VOIDmode)
9929 part = gen_highpart_mode (SImode, mode, val);
9931 gcc_assert (GET_CODE (part) == CONST_INT);
9933 if (const_ok_for_arm (INTVAL (part))
9934 || const_ok_for_arm (~INTVAL (part)))
9937 part = gen_lowpart (SImode, val);
9939 gcc_assert (GET_CODE (part) == CONST_INT);
9941 if (const_ok_for_arm (INTVAL (part))
9942 || const_ok_for_arm (~INTVAL (part)))
9948 /* Scan INSN and note any of its operands that need fixing.
9949 If DO_PUSHES is false we do not actually push any of the fixups
9950 needed. The function returns TRUE if any fixups were needed/pushed.
9951 This is used by arm_memory_load_p() which needs to know about loads
9952 of constants that will be converted into minipool loads. */
9954 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
9956 bool result = false;
9959 extract_insn (insn);
9961 if (!constrain_operands (1))
9962 fatal_insn_not_found (insn);
9964 if (recog_data.n_alternatives == 0)
9967 /* Fill in recog_op_alt with information about the constraints of
9969 preprocess_constraints ();
9971 for (opno = 0; opno < recog_data.n_operands; opno++)
9973 /* Things we need to fix can only occur in inputs. */
9974 if (recog_data.operand_type[opno] != OP_IN)
9977 /* If this alternative is a memory reference, then any mention
9978 of constants in this alternative is really to fool reload
9979 into allowing us to accept one there. We need to fix them up
9980 now so that we output the right code. */
9981 if (recog_op_alt[opno][which_alternative].memory_ok)
9983 rtx op = recog_data.operand[opno];
9985 if (CONSTANT_P (op))
9988 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
9989 recog_data.operand_mode[opno], op);
9992 else if (GET_CODE (op) == MEM
9993 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
9994 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
9998 rtx cop = avoid_constant_pool_reference (op);
10000 /* Casting the address of something to a mode narrower
10001 than a word can cause avoid_constant_pool_reference()
10002 to return the pool reference itself. That's no good to
10003 us here. Lets just hope that we can use the
10004 constant pool value directly. */
10006 cop = get_pool_constant (XEXP (op, 0));
10008 push_minipool_fix (insn, address,
10009 recog_data.operand_loc[opno],
10010 recog_data.operand_mode[opno], cop);
10021 /* Gcc puts the pool in the wrong place for ARM, since we can only
10022 load addresses a limited distance around the pc. We do some
10023 special munging to move the constant pool values to the correct
10024 point in the code. */
10029 HOST_WIDE_INT address = 0;
10032 minipool_fix_head = minipool_fix_tail = NULL;
10034 /* The first insn must always be a note, or the code below won't
10035 scan it properly. */
10036 insn = get_insns ();
10037 gcc_assert (GET_CODE (insn) == NOTE);
10040 /* Scan all the insns and record the operands that will need fixing. */
10041 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
10043 if (TARGET_CIRRUS_FIX_INVALID_INSNS
10044 && (arm_cirrus_insn_p (insn)
10045 || GET_CODE (insn) == JUMP_INSN
10046 || arm_memory_load_p (insn)))
10047 cirrus_reorg (insn);
10049 if (GET_CODE (insn) == BARRIER)
10050 push_minipool_barrier (insn, address);
10051 else if (INSN_P (insn))
10055 note_invalid_constants (insn, address, true);
10056 address += get_attr_length (insn);
10058 /* If the insn is a vector jump, add the size of the table
10059 and skip the table. */
10060 if ((table = is_jump_table (insn)) != NULL)
10062 address += get_jump_table_size (table);
10068 fix = minipool_fix_head;
10070 /* Now scan the fixups and perform the required changes. */
10075 Mfix * last_added_fix;
10076 Mfix * last_barrier = NULL;
10079 /* Skip any further barriers before the next fix. */
10080 while (fix && GET_CODE (fix->insn) == BARRIER)
10083 /* No more fixes. */
10087 last_added_fix = NULL;
10089 for (ftmp = fix; ftmp; ftmp = ftmp->next)
10091 if (GET_CODE (ftmp->insn) == BARRIER)
10093 if (ftmp->address >= minipool_vector_head->max_address)
10096 last_barrier = ftmp;
10098 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
10101 last_added_fix = ftmp; /* Keep track of the last fix added. */
10104 /* If we found a barrier, drop back to that; any fixes that we
10105 could have reached but come after the barrier will now go in
10106 the next mini-pool. */
10107 if (last_barrier != NULL)
10109 /* Reduce the refcount for those fixes that won't go into this
10111 for (fdel = last_barrier->next;
10112 fdel && fdel != ftmp;
10115 fdel->minipool->refcount--;
10116 fdel->minipool = NULL;
10119 ftmp = last_barrier;
10123 /* ftmp is first fix that we can't fit into this pool and
10124 there no natural barriers that we could use. Insert a
10125 new barrier in the code somewhere between the previous
10126 fix and this one, and arrange to jump around it. */
10127 HOST_WIDE_INT max_address;
10129 /* The last item on the list of fixes must be a barrier, so
10130 we can never run off the end of the list of fixes without
10131 last_barrier being set. */
10134 max_address = minipool_vector_head->max_address;
10135 /* Check that there isn't another fix that is in range that
10136 we couldn't fit into this pool because the pool was
10137 already too large: we need to put the pool before such an
10138 instruction. The pool itself may come just after the
10139 fix because create_fix_barrier also allows space for a
10140 jump instruction. */
10141 if (ftmp->address < max_address)
10142 max_address = ftmp->address + 1;
10144 last_barrier = create_fix_barrier (last_added_fix, max_address);
10147 assign_minipool_offsets (last_barrier);
10151 if (GET_CODE (ftmp->insn) != BARRIER
10152 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
10159 /* Scan over the fixes we have identified for this pool, fixing them
10160 up and adding the constants to the pool itself. */
10161 for (this_fix = fix; this_fix && ftmp != this_fix;
10162 this_fix = this_fix->next)
10163 if (GET_CODE (this_fix->insn) != BARRIER)
10166 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
10167 minipool_vector_label),
10168 this_fix->minipool->offset);
10169 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
10172 dump_minipool (last_barrier->insn);
10176 /* From now on we must synthesize any constants that we can't handle
10177 directly. This can happen if the RTL gets split during final
10178 instruction generation. */
10179 after_arm_reorg = 1;
10181 /* Free the minipool memory. */
10182 obstack_free (&minipool_obstack, minipool_startobj);
10185 /* Routines to output assembly language. */
10187 /* If the rtx is the correct value then return the string of the number.
10188 In this way we can ensure that valid double constants are generated even
10189 when cross compiling. */
10191 fp_immediate_constant (rtx x)
10196 if (!fp_consts_inited)
10199 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10200 for (i = 0; i < 8; i++)
10201 if (REAL_VALUES_EQUAL (r, values_fp[i]))
10202 return strings_fp[i];
10204 gcc_unreachable ();
10207 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
10208 static const char *
10209 fp_const_from_val (REAL_VALUE_TYPE *r)
10213 if (!fp_consts_inited)
10216 for (i = 0; i < 8; i++)
10217 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
10218 return strings_fp[i];
10220 gcc_unreachable ();
10223 /* Output the operands of a LDM/STM instruction to STREAM.
10224 MASK is the ARM register set mask of which only bits 0-15 are important.
10225 REG is the base register, either the frame pointer or the stack pointer,
10226 INSTR is the possibly suffixed load or store instruction.
10227 RFE is nonzero if the instruction should also copy spsr to cpsr. */
10230 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
10231 unsigned long mask, int rfe)
10234 bool not_first = FALSE;
10236 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
10237 fputc ('\t', stream);
10238 asm_fprintf (stream, instr, reg);
10239 fputc ('{', stream);
10241 for (i = 0; i <= LAST_ARM_REGNUM; i++)
10242 if (mask & (1 << i))
10245 fprintf (stream, ", ");
10247 asm_fprintf (stream, "%r", i);
10252 fprintf (stream, "}^\n");
10254 fprintf (stream, "}\n");
10258 /* Output a FLDMD instruction to STREAM.
10259 BASE if the register containing the address.
10260 REG and COUNT specify the register range.
10261 Extra registers may be added to avoid hardware bugs.
10263 We output FLDMD even for ARMv5 VFP implementations. Although
10264 FLDMD is technically not supported until ARMv6, it is believed
10265 that all VFP implementations support its use in this context. */
10268 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
10272 /* Workaround ARM10 VFPr1 bug. */
10273 if (count == 2 && !arm_arch6)
10280 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
10281 load into multiple parts if we have to handle more than 16 registers. */
10284 vfp_output_fldmd (stream, base, reg, 16);
10285 vfp_output_fldmd (stream, base, reg + 16, count - 16);
10289 fputc ('\t', stream);
10290 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
10292 for (i = reg; i < reg + count; i++)
10295 fputs (", ", stream);
10296 asm_fprintf (stream, "d%d", i);
10298 fputs ("}\n", stream);
10303 /* Output the assembly for a store multiple. */
10306 vfp_output_fstmd (rtx * operands)
10313 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
10314 p = strlen (pattern);
10316 gcc_assert (GET_CODE (operands[1]) == REG);
10318 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
10319 for (i = 1; i < XVECLEN (operands[2], 0); i++)
10321 p += sprintf (&pattern[p], ", d%d", base + i);
10323 strcpy (&pattern[p], "}");
10325 output_asm_insn (pattern, operands);
10330 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
10331 number of bytes pushed. */
10334 vfp_emit_fstmd (int base_reg, int count)
10341 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
10342 register pairs are stored by a store multiple insn. We avoid this
10343 by pushing an extra pair. */
10344 if (count == 2 && !arm_arch6)
10346 if (base_reg == LAST_VFP_REGNUM - 3)
10351 /* FSTMD may not store more than 16 doubleword registers at once. Split
10352 larger stores into multiple parts (up to a maximum of two, in
10357 /* NOTE: base_reg is an internal register number, so each D register
10359 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
10360 saved += vfp_emit_fstmd (base_reg, 16);
10364 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
10365 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
10367 reg = gen_rtx_REG (DFmode, base_reg);
10370 XVECEXP (par, 0, 0)
10371 = gen_rtx_SET (VOIDmode,
10372 gen_frame_mem (BLKmode,
10373 gen_rtx_PRE_DEC (BLKmode,
10374 stack_pointer_rtx)),
10375 gen_rtx_UNSPEC (BLKmode,
10376 gen_rtvec (1, reg),
10377 UNSPEC_PUSH_MULT));
10379 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10380 plus_constant (stack_pointer_rtx, -(count * 8)));
10381 RTX_FRAME_RELATED_P (tmp) = 1;
10382 XVECEXP (dwarf, 0, 0) = tmp;
10384 tmp = gen_rtx_SET (VOIDmode,
10385 gen_frame_mem (DFmode, stack_pointer_rtx),
10387 RTX_FRAME_RELATED_P (tmp) = 1;
10388 XVECEXP (dwarf, 0, 1) = tmp;
10390 for (i = 1; i < count; i++)
10392 reg = gen_rtx_REG (DFmode, base_reg);
10394 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
10396 tmp = gen_rtx_SET (VOIDmode,
10397 gen_frame_mem (DFmode,
10398 plus_constant (stack_pointer_rtx,
10401 RTX_FRAME_RELATED_P (tmp) = 1;
10402 XVECEXP (dwarf, 0, i + 1) = tmp;
10405 par = emit_insn (par);
10406 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
10407 RTX_FRAME_RELATED_P (par) = 1;
10412 /* Emit a call instruction with pattern PAT. ADDR is the address of
10413 the call target. */
10416 arm_emit_call_insn (rtx pat, rtx addr)
10420 insn = emit_call_insn (pat);
10422 /* The PIC register is live on entry to VxWorks PIC PLT entries.
10423 If the call might use such an entry, add a use of the PIC register
10424 to the instruction's CALL_INSN_FUNCTION_USAGE. */
10425 if (TARGET_VXWORKS_RTP
10427 && GET_CODE (addr) == SYMBOL_REF
10428 && (SYMBOL_REF_DECL (addr)
10429 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
10430 : !SYMBOL_REF_LOCAL_P (addr)))
10432 require_pic_register ();
10433 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
10437 /* Output a 'call' insn. */
10439 output_call (rtx *operands)
10441 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
10443 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
10444 if (REGNO (operands[0]) == LR_REGNUM)
10446 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
10447 output_asm_insn ("mov%?\t%0, %|lr", operands);
10450 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10452 if (TARGET_INTERWORK || arm_arch4t)
10453 output_asm_insn ("bx%?\t%0", operands);
10455 output_asm_insn ("mov%?\t%|pc, %0", operands);
10460 /* Output a 'call' insn that is a reference in memory. */
10462 output_call_mem (rtx *operands)
10464 if (TARGET_INTERWORK && !arm_arch5)
10466 output_asm_insn ("ldr%?\t%|ip, %0", operands);
10467 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10468 output_asm_insn ("bx%?\t%|ip", operands);
10470 else if (regno_use_in (LR_REGNUM, operands[0]))
10472 /* LR is used in the memory address. We load the address in the
10473 first instruction. It's safe to use IP as the target of the
10474 load since the call will kill it anyway. */
10475 output_asm_insn ("ldr%?\t%|ip, %0", operands);
10477 output_asm_insn ("blx%?\t%|ip", operands);
10480 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10482 output_asm_insn ("bx%?\t%|ip", operands);
10484 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
10489 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
10490 output_asm_insn ("ldr%?\t%|pc, %0", operands);
10497 /* Output a move from arm registers to an fpa registers.
10498 OPERANDS[0] is an fpa register.
10499 OPERANDS[1] is the first registers of an arm register pair. */
10501 output_mov_long_double_fpa_from_arm (rtx *operands)
10503 int arm_reg0 = REGNO (operands[1]);
10506 gcc_assert (arm_reg0 != IP_REGNUM);
10508 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10509 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10510 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
10512 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
10513 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
10518 /* Output a move from an fpa register to arm registers.
10519 OPERANDS[0] is the first registers of an arm register pair.
10520 OPERANDS[1] is an fpa register. */
10522 output_mov_long_double_arm_from_fpa (rtx *operands)
10524 int arm_reg0 = REGNO (operands[0]);
10527 gcc_assert (arm_reg0 != IP_REGNUM);
10529 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10530 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10531 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
10533 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
10534 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
10538 /* Output a move from arm registers to arm registers of a long double
10539 OPERANDS[0] is the destination.
10540 OPERANDS[1] is the source. */
10542 output_mov_long_double_arm_from_arm (rtx *operands)
10544 /* We have to be careful here because the two might overlap. */
10545 int dest_start = REGNO (operands[0]);
10546 int src_start = REGNO (operands[1]);
10550 if (dest_start < src_start)
10552 for (i = 0; i < 3; i++)
10554 ops[0] = gen_rtx_REG (SImode, dest_start + i);
10555 ops[1] = gen_rtx_REG (SImode, src_start + i);
10556 output_asm_insn ("mov%?\t%0, %1", ops);
10561 for (i = 2; i >= 0; i--)
10563 ops[0] = gen_rtx_REG (SImode, dest_start + i);
10564 ops[1] = gen_rtx_REG (SImode, src_start + i);
10565 output_asm_insn ("mov%?\t%0, %1", ops);
10573 /* Emit a MOVW/MOVT pair. */
10574 void arm_emit_movpair (rtx dest, rtx src)
10576 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
10577 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
10581 /* Output a move from arm registers to an fpa registers.
10582 OPERANDS[0] is an fpa register.
10583 OPERANDS[1] is the first registers of an arm register pair. */
10585 output_mov_double_fpa_from_arm (rtx *operands)
10587 int arm_reg0 = REGNO (operands[1]);
10590 gcc_assert (arm_reg0 != IP_REGNUM);
10592 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10593 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10594 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
10595 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
10599 /* Output a move from an fpa register to arm registers.
10600 OPERANDS[0] is the first registers of an arm register pair.
10601 OPERANDS[1] is an fpa register. */
10603 output_mov_double_arm_from_fpa (rtx *operands)
10605 int arm_reg0 = REGNO (operands[0]);
10608 gcc_assert (arm_reg0 != IP_REGNUM);
10610 ops[0] = gen_rtx_REG (SImode, arm_reg0);
10611 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
10612 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
10613 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
10617 /* Output a move between double words.
10618 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
10619 or MEM<-REG and all MEMs must be offsettable addresses. */
10621 output_move_double (rtx *operands)
10623 enum rtx_code code0 = GET_CODE (operands[0]);
10624 enum rtx_code code1 = GET_CODE (operands[1]);
10629 unsigned int reg0 = REGNO (operands[0]);
10631 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
10633 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
10635 switch (GET_CODE (XEXP (operands[1], 0)))
10639 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
10640 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
10642 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10646 gcc_assert (TARGET_LDRD);
10647 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
10652 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
10654 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
10659 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
10661 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
10665 gcc_assert (TARGET_LDRD);
10666 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
10671 /* Autoicrement addressing modes should never have overlapping
10672 base and destination registers, and overlapping index registers
10673 are already prohibited, so this doesn't need to worry about
10675 otherops[0] = operands[0];
10676 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
10677 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
10679 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
10681 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
10683 /* Registers overlap so split out the increment. */
10684 output_asm_insn ("add%?\t%1, %1, %2", otherops);
10685 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
10689 /* Use a single insn if we can.
10690 FIXME: IWMMXT allows offsets larger than ldrd can
10691 handle, fix these up with a pair of ldr. */
10693 || GET_CODE (otherops[2]) != CONST_INT
10694 || (INTVAL (otherops[2]) > -256
10695 && INTVAL (otherops[2]) < 256))
10696 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
10699 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10700 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10706 /* Use a single insn if we can.
10707 FIXME: IWMMXT allows offsets larger than ldrd can handle,
10708 fix these up with a pair of ldr. */
10710 || GET_CODE (otherops[2]) != CONST_INT
10711 || (INTVAL (otherops[2]) > -256
10712 && INTVAL (otherops[2]) < 256))
10713 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
10716 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10717 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10724 /* We might be able to use ldrd %0, %1 here. However the range is
10725 different to ldr/adr, and it is broken on some ARMv7-M
10726 implementations. */
10727 /* Use the second register of the pair to avoid problematic
10729 otherops[1] = operands[1];
10730 output_asm_insn ("adr%?\t%0, %1", otherops);
10731 operands[1] = otherops[0];
10733 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
10735 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
10738 /* ??? This needs checking for thumb2. */
10740 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
10741 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
10743 otherops[0] = operands[0];
10744 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
10745 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
10747 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
10749 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
10751 switch ((int) INTVAL (otherops[2]))
10754 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
10759 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
10764 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
10768 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
10769 operands[1] = otherops[0];
10771 && (GET_CODE (otherops[2]) == REG
10773 || (GET_CODE (otherops[2]) == CONST_INT
10774 && INTVAL (otherops[2]) > -256
10775 && INTVAL (otherops[2]) < 256)))
10777 if (reg_overlap_mentioned_p (operands[0],
10781 /* Swap base and index registers over to
10782 avoid a conflict. */
10784 otherops[1] = otherops[2];
10787 /* If both registers conflict, it will usually
10788 have been fixed by a splitter. */
10789 if (reg_overlap_mentioned_p (operands[0], otherops[2])
10790 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
10792 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10793 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
10797 otherops[0] = operands[0];
10798 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
10803 if (GET_CODE (otherops[2]) == CONST_INT)
10805 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
10806 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
10808 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10811 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10814 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
10817 return "ldr%(d%)\t%0, [%1]";
10819 return "ldm%(ia%)\t%1, %M0";
10823 otherops[1] = adjust_address (operands[1], SImode, 4);
10824 /* Take care of overlapping base/data reg. */
10825 if (reg_mentioned_p (operands[0], operands[1]))
10827 output_asm_insn ("ldr%?\t%0, %1", otherops);
10828 output_asm_insn ("ldr%?\t%0, %1", operands);
10832 output_asm_insn ("ldr%?\t%0, %1", operands);
10833 output_asm_insn ("ldr%?\t%0, %1", otherops);
10840 /* Constraints should ensure this. */
10841 gcc_assert (code0 == MEM && code1 == REG);
10842 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
10844 switch (GET_CODE (XEXP (operands[0], 0)))
10848 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
10850 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10854 gcc_assert (TARGET_LDRD);
10855 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
10860 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
10862 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
10867 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
10869 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
10873 gcc_assert (TARGET_LDRD);
10874 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
10879 otherops[0] = operands[1];
10880 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
10881 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
10883 /* IWMMXT allows offsets larger than ldrd can handle,
10884 fix these up with a pair of ldr. */
10886 && GET_CODE (otherops[2]) == CONST_INT
10887 && (INTVAL(otherops[2]) <= -256
10888 || INTVAL(otherops[2]) >= 256))
10890 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10892 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10893 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10897 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
10898 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10901 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10902 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
10904 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
10908 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
10909 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
10911 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
10914 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
10920 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
10926 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
10931 && (GET_CODE (otherops[2]) == REG
10933 || (GET_CODE (otherops[2]) == CONST_INT
10934 && INTVAL (otherops[2]) > -256
10935 && INTVAL (otherops[2]) < 256)))
10937 otherops[0] = operands[1];
10938 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
10939 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
10945 otherops[0] = adjust_address (operands[0], SImode, 4);
10946 otherops[1] = operands[1];
10947 output_asm_insn ("str%?\t%1, %0", operands);
10948 output_asm_insn ("str%?\t%H1, %0", otherops);
10955 /* Output a move, load or store for quad-word vectors in ARM registers. Only
10956 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
10959 output_move_quad (rtx *operands)
10961 if (REG_P (operands[0]))
10963 /* Load, or reg->reg move. */
10965 if (MEM_P (operands[1]))
10967 switch (GET_CODE (XEXP (operands[1], 0)))
10970 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10975 output_asm_insn ("adr%?\t%0, %1", operands);
10976 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
10980 gcc_unreachable ();
10988 gcc_assert (REG_P (operands[1]));
10990 dest = REGNO (operands[0]);
10991 src = REGNO (operands[1]);
10993 /* This seems pretty dumb, but hopefully GCC won't try to do it
10996 for (i = 0; i < 4; i++)
10998 ops[0] = gen_rtx_REG (SImode, dest + i);
10999 ops[1] = gen_rtx_REG (SImode, src + i);
11000 output_asm_insn ("mov%?\t%0, %1", ops);
11003 for (i = 3; i >= 0; i--)
11005 ops[0] = gen_rtx_REG (SImode, dest + i);
11006 ops[1] = gen_rtx_REG (SImode, src + i);
11007 output_asm_insn ("mov%?\t%0, %1", ops);
11013 gcc_assert (MEM_P (operands[0]));
11014 gcc_assert (REG_P (operands[1]));
11015 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
11017 switch (GET_CODE (XEXP (operands[0], 0)))
11020 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
11024 gcc_unreachable ();
11031 /* Output a VFP load or store instruction. */
11034 output_move_vfp (rtx *operands)
11036 rtx reg, mem, addr, ops[2];
11037 int load = REG_P (operands[0]);
11038 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
11039 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
11042 enum machine_mode mode;
11044 reg = operands[!load];
11045 mem = operands[load];
11047 mode = GET_MODE (reg);
11049 gcc_assert (REG_P (reg));
11050 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
11051 gcc_assert (mode == SFmode
11055 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
11056 gcc_assert (MEM_P (mem));
11058 addr = XEXP (mem, 0);
11060 switch (GET_CODE (addr))
11063 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
11064 ops[0] = XEXP (addr, 0);
11069 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
11070 ops[0] = XEXP (addr, 0);
11075 templ = "f%s%c%%?\t%%%s0, %%1%s";
11081 sprintf (buff, templ,
11082 load ? "ld" : "st",
11085 integer_p ? "\t%@ int" : "");
11086 output_asm_insn (buff, ops);
11091 /* Output a Neon quad-word load or store, or a load or store for
11092 larger structure modes.
11094 WARNING: The ordering of elements is weird in big-endian mode,
11095 because we use VSTM, as required by the EABI. GCC RTL defines
11096 element ordering based on in-memory order. This can be differ
11097 from the architectural ordering of elements within a NEON register.
11098 The intrinsics defined in arm_neon.h use the NEON register element
11099 ordering, not the GCC RTL element ordering.
11101 For example, the in-memory ordering of a big-endian a quadword
11102 vector with 16-bit elements when stored from register pair {d0,d1}
11103 will be (lowest address first, d0[N] is NEON register element N):
11105 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
11107 When necessary, quadword registers (dN, dN+1) are moved to ARM
11108 registers from rN in the order:
11110 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
11112 So that STM/LDM can be used on vectors in ARM registers, and the
11113 same memory layout will result as if VSTM/VLDM were used. */
11116 output_move_neon (rtx *operands)
11118 rtx reg, mem, addr, ops[2];
11119 int regno, load = REG_P (operands[0]);
11122 enum machine_mode mode;
11124 reg = operands[!load];
11125 mem = operands[load];
11127 mode = GET_MODE (reg);
11129 gcc_assert (REG_P (reg));
11130 regno = REGNO (reg);
11131 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
11132 || NEON_REGNO_OK_FOR_QUAD (regno));
11133 gcc_assert (VALID_NEON_DREG_MODE (mode)
11134 || VALID_NEON_QREG_MODE (mode)
11135 || VALID_NEON_STRUCT_MODE (mode));
11136 gcc_assert (MEM_P (mem));
11138 addr = XEXP (mem, 0);
11140 /* Strip off const from addresses like (const (plus (...))). */
11141 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
11142 addr = XEXP (addr, 0);
11144 switch (GET_CODE (addr))
11147 templ = "v%smia%%?\t%%0!, %%h1";
11148 ops[0] = XEXP (addr, 0);
11153 /* FIXME: We should be using vld1/vst1 here in BE mode? */
11154 templ = "v%smdb%%?\t%%0!, %%h1";
11155 ops[0] = XEXP (addr, 0);
11160 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
11161 gcc_unreachable ();
11166 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
11169 for (i = 0; i < nregs; i++)
11171 /* We're only using DImode here because it's a convenient size. */
11172 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
11173 ops[1] = adjust_address (mem, SImode, 8 * i);
11174 if (reg_overlap_mentioned_p (ops[0], mem))
11176 gcc_assert (overlap == -1);
11181 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
11182 output_asm_insn (buff, ops);
11187 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
11188 ops[1] = adjust_address (mem, SImode, 8 * overlap);
11189 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
11190 output_asm_insn (buff, ops);
11197 templ = "v%smia%%?\t%%m0, %%h1";
11202 sprintf (buff, templ, load ? "ld" : "st");
11203 output_asm_insn (buff, ops);
11208 /* Output an ADD r, s, #n where n may be too big for one instruction.
11209 If adding zero to one register, output nothing. */
11211 output_add_immediate (rtx *operands)
11213 HOST_WIDE_INT n = INTVAL (operands[2]);
11215 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
11218 output_multi_immediate (operands,
11219 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
11222 output_multi_immediate (operands,
11223 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
11230 /* Output a multiple immediate operation.
11231 OPERANDS is the vector of operands referred to in the output patterns.
11232 INSTR1 is the output pattern to use for the first constant.
11233 INSTR2 is the output pattern to use for subsequent constants.
11234 IMMED_OP is the index of the constant slot in OPERANDS.
11235 N is the constant value. */
11236 static const char *
11237 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
11238 int immed_op, HOST_WIDE_INT n)
11240 #if HOST_BITS_PER_WIDE_INT > 32
11246 /* Quick and easy output. */
11247 operands[immed_op] = const0_rtx;
11248 output_asm_insn (instr1, operands);
11253 const char * instr = instr1;
11255 /* Note that n is never zero here (which would give no output). */
11256 for (i = 0; i < 32; i += 2)
11260 operands[immed_op] = GEN_INT (n & (255 << i));
11261 output_asm_insn (instr, operands);
11271 /* Return the name of a shifter operation. */
11272 static const char *
11273 arm_shift_nmem(enum rtx_code code)
11278 return ARM_LSL_NAME;
11294 /* Return the appropriate ARM instruction for the operation code.
11295 The returned result should not be overwritten. OP is the rtx of the
11296 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
11299 arithmetic_instr (rtx op, int shift_first_arg)
11301 switch (GET_CODE (op))
11307 return shift_first_arg ? "rsb" : "sub";
11322 return arm_shift_nmem(GET_CODE(op));
11325 gcc_unreachable ();
11329 /* Ensure valid constant shifts and return the appropriate shift mnemonic
11330 for the operation code. The returned result should not be overwritten.
11331 OP is the rtx code of the shift.
11332 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
11334 static const char *
11335 shift_op (rtx op, HOST_WIDE_INT *amountp)
11338 enum rtx_code code = GET_CODE (op);
11340 switch (GET_CODE (XEXP (op, 1)))
11348 *amountp = INTVAL (XEXP (op, 1));
11352 gcc_unreachable ();
11358 gcc_assert (*amountp != -1);
11359 *amountp = 32 - *amountp;
11362 /* Fall through. */
11368 mnem = arm_shift_nmem(code);
11372 /* We never have to worry about the amount being other than a
11373 power of 2, since this case can never be reloaded from a reg. */
11374 gcc_assert (*amountp != -1);
11375 *amountp = int_log2 (*amountp);
11376 return ARM_LSL_NAME;
11379 gcc_unreachable ();
11382 if (*amountp != -1)
11384 /* This is not 100% correct, but follows from the desire to merge
11385 multiplication by a power of 2 with the recognizer for a
11386 shift. >=32 is not a valid shift for "lsl", so we must try and
11387 output a shift that produces the correct arithmetical result.
11388 Using lsr #32 is identical except for the fact that the carry bit
11389 is not set correctly if we set the flags; but we never use the
11390 carry bit from such an operation, so we can ignore that. */
11391 if (code == ROTATERT)
11392 /* Rotate is just modulo 32. */
11394 else if (*amountp != (*amountp & 31))
11396 if (code == ASHIFT)
11401 /* Shifts of 0 are no-ops. */
11409 /* Obtain the shift from the POWER of two. */
11411 static HOST_WIDE_INT
11412 int_log2 (HOST_WIDE_INT power)
11414 HOST_WIDE_INT shift = 0;
11416 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
11418 gcc_assert (shift <= 31);
11425 /* Output a .ascii pseudo-op, keeping track of lengths. This is
11426 because /bin/as is horribly restrictive. The judgement about
11427 whether or not each character is 'printable' (and can be output as
11428 is) or not (and must be printed with an octal escape) must be made
11429 with reference to the *host* character set -- the situation is
11430 similar to that discussed in the comments above pp_c_char in
11431 c-pretty-print.c. */
11433 #define MAX_ASCII_LEN 51
11436 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
11439 int len_so_far = 0;
11441 fputs ("\t.ascii\t\"", stream);
11443 for (i = 0; i < len; i++)
11447 if (len_so_far >= MAX_ASCII_LEN)
11449 fputs ("\"\n\t.ascii\t\"", stream);
11455 if (c == '\\' || c == '\"')
11457 putc ('\\', stream);
11465 fprintf (stream, "\\%03o", c);
11470 fputs ("\"\n", stream);
11473 /* Compute the register save mask for registers 0 through 12
11474 inclusive. This code is used by arm_compute_save_reg_mask. */
11476 static unsigned long
11477 arm_compute_save_reg0_reg12_mask (void)
11479 unsigned long func_type = arm_current_func_type ();
11480 unsigned long save_reg_mask = 0;
11483 if (IS_INTERRUPT (func_type))
11485 unsigned int max_reg;
11486 /* Interrupt functions must not corrupt any registers,
11487 even call clobbered ones. If this is a leaf function
11488 we can just examine the registers used by the RTL, but
11489 otherwise we have to assume that whatever function is
11490 called might clobber anything, and so we have to save
11491 all the call-clobbered registers as well. */
11492 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
11493 /* FIQ handlers have registers r8 - r12 banked, so
11494 we only need to check r0 - r7, Normal ISRs only
11495 bank r14 and r15, so we must check up to r12.
11496 r13 is the stack pointer which is always preserved,
11497 so we do not need to consider it here. */
11502 for (reg = 0; reg <= max_reg; reg++)
11503 if (df_regs_ever_live_p (reg)
11504 || (! current_function_is_leaf && call_used_regs[reg]))
11505 save_reg_mask |= (1 << reg);
11507 /* Also save the pic base register if necessary. */
11509 && !TARGET_SINGLE_PIC_BASE
11510 && arm_pic_register != INVALID_REGNUM
11511 && crtl->uses_pic_offset_table)
11512 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11516 /* In the normal case we only need to save those registers
11517 which are call saved and which are used by this function. */
11518 for (reg = 0; reg <= 11; reg++)
11519 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
11520 save_reg_mask |= (1 << reg);
11522 /* Handle the frame pointer as a special case. */
11523 if (frame_pointer_needed)
11524 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
11526 /* If we aren't loading the PIC register,
11527 don't stack it even though it may be live. */
11529 && !TARGET_SINGLE_PIC_BASE
11530 && arm_pic_register != INVALID_REGNUM
11531 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
11532 || crtl->uses_pic_offset_table))
11533 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11535 /* The prologue will copy SP into R0, so save it. */
11536 if (IS_STACKALIGN (func_type))
11537 save_reg_mask |= 1;
11540 /* Save registers so the exception handler can modify them. */
11541 if (crtl->calls_eh_return)
11547 reg = EH_RETURN_DATA_REGNO (i);
11548 if (reg == INVALID_REGNUM)
11550 save_reg_mask |= 1 << reg;
11554 return save_reg_mask;
11558 /* Compute the number of bytes used to store the static chain register on the
11559 stack, above the stack frame. We need to know this accurately to get the
11560 alignment of the rest of the stack frame correct. */
11562 static int arm_compute_static_chain_stack_bytes (void)
11564 unsigned long func_type = arm_current_func_type ();
11565 int static_chain_stack_bytes = 0;
11567 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
11568 IS_NESTED (func_type) &&
11569 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
11570 static_chain_stack_bytes = 4;
11572 return static_chain_stack_bytes;
11576 /* Compute a bit mask of which registers need to be
11577 saved on the stack for the current function.
11578 This is used by arm_get_frame_offsets, which may add extra registers. */
11580 static unsigned long
11581 arm_compute_save_reg_mask (void)
11583 unsigned int save_reg_mask = 0;
11584 unsigned long func_type = arm_current_func_type ();
11587 if (IS_NAKED (func_type))
11588 /* This should never really happen. */
11591 /* If we are creating a stack frame, then we must save the frame pointer,
11592 IP (which will hold the old stack pointer), LR and the PC. */
11593 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
11595 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
11598 | (1 << PC_REGNUM);
11600 /* Volatile functions do not return, so there
11601 is no need to save any other registers. */
11602 if (IS_VOLATILE (func_type))
11603 return save_reg_mask;
11605 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
11607 /* Decide if we need to save the link register.
11608 Interrupt routines have their own banked link register,
11609 so they never need to save it.
11610 Otherwise if we do not use the link register we do not need to save
11611 it. If we are pushing other registers onto the stack however, we
11612 can save an instruction in the epilogue by pushing the link register
11613 now and then popping it back into the PC. This incurs extra memory
11614 accesses though, so we only do it when optimizing for size, and only
11615 if we know that we will not need a fancy return sequence. */
11616 if (df_regs_ever_live_p (LR_REGNUM)
11619 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
11620 && !crtl->calls_eh_return))
11621 save_reg_mask |= 1 << LR_REGNUM;
11623 if (cfun->machine->lr_save_eliminated)
11624 save_reg_mask &= ~ (1 << LR_REGNUM);
11626 if (TARGET_REALLY_IWMMXT
11627 && ((bit_count (save_reg_mask)
11628 + ARM_NUM_INTS (crtl->args.pretend_args_size +
11629 arm_compute_static_chain_stack_bytes())
11632 /* The total number of registers that are going to be pushed
11633 onto the stack is odd. We need to ensure that the stack
11634 is 64-bit aligned before we start to save iWMMXt registers,
11635 and also before we start to create locals. (A local variable
11636 might be a double or long long which we will load/store using
11637 an iWMMXt instruction). Therefore we need to push another
11638 ARM register, so that the stack will be 64-bit aligned. We
11639 try to avoid using the arg registers (r0 -r3) as they might be
11640 used to pass values in a tail call. */
11641 for (reg = 4; reg <= 12; reg++)
11642 if ((save_reg_mask & (1 << reg)) == 0)
11646 save_reg_mask |= (1 << reg);
11649 cfun->machine->sibcall_blocked = 1;
11650 save_reg_mask |= (1 << 3);
11654 /* We may need to push an additional register for use initializing the
11655 PIC base register. */
11656 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
11657 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
11659 reg = thumb_find_work_register (1 << 4);
11660 if (!call_used_regs[reg])
11661 save_reg_mask |= (1 << reg);
11664 return save_reg_mask;
11668 /* Compute a bit mask of which registers need to be
11669 saved on the stack for the current function. */
11670 static unsigned long
11671 thumb1_compute_save_reg_mask (void)
11673 unsigned long mask;
11677 for (reg = 0; reg < 12; reg ++)
11678 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11682 && !TARGET_SINGLE_PIC_BASE
11683 && arm_pic_register != INVALID_REGNUM
11684 && crtl->uses_pic_offset_table)
11685 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
11687 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
11688 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
11689 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
11691 /* LR will also be pushed if any lo regs are pushed. */
11692 if (mask & 0xff || thumb_force_lr_save ())
11693 mask |= (1 << LR_REGNUM);
11695 /* Make sure we have a low work register if we need one.
11696 We will need one if we are going to push a high register,
11697 but we are not currently intending to push a low register. */
11698 if ((mask & 0xff) == 0
11699 && ((mask & 0x0f00) || TARGET_BACKTRACE))
11701 /* Use thumb_find_work_register to choose which register
11702 we will use. If the register is live then we will
11703 have to push it. Use LAST_LO_REGNUM as our fallback
11704 choice for the register to select. */
11705 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
11706 /* Make sure the register returned by thumb_find_work_register is
11707 not part of the return value. */
11708 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
11709 reg = LAST_LO_REGNUM;
11711 if (! call_used_regs[reg])
11715 /* The 504 below is 8 bytes less than 512 because there are two possible
11716 alignment words. We can't tell here if they will be present or not so we
11717 have to play it safe and assume that they are. */
11718 if ((CALLER_INTERWORKING_SLOT_SIZE +
11719 ROUND_UP_WORD (get_frame_size ()) +
11720 crtl->outgoing_args_size) >= 504)
11722 /* This is the same as the code in thumb1_expand_prologue() which
11723 determines which register to use for stack decrement. */
11724 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
11725 if (mask & (1 << reg))
11728 if (reg > LAST_LO_REGNUM)
11730 /* Make sure we have a register available for stack decrement. */
11731 mask |= 1 << LAST_LO_REGNUM;
11739 /* Return the number of bytes required to save VFP registers. */
11741 arm_get_vfp_saved_size (void)
11743 unsigned int regno;
11748 /* Space for saved VFP registers. */
11749 if (TARGET_HARD_FLOAT && TARGET_VFP)
11752 for (regno = FIRST_VFP_REGNUM;
11753 regno < LAST_VFP_REGNUM;
11756 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
11757 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
11761 /* Workaround ARM10 VFPr1 bug. */
11762 if (count == 2 && !arm_arch6)
11764 saved += count * 8;
11773 if (count == 2 && !arm_arch6)
11775 saved += count * 8;
11782 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
11783 everything bar the final return instruction. */
11785 output_return_instruction (rtx operand, int really_return, int reverse)
11787 char conditional[10];
11790 unsigned long live_regs_mask;
11791 unsigned long func_type;
11792 arm_stack_offsets *offsets;
11794 func_type = arm_current_func_type ();
11796 if (IS_NAKED (func_type))
11799 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11801 /* If this function was declared non-returning, and we have
11802 found a tail call, then we have to trust that the called
11803 function won't return. */
11808 /* Otherwise, trap an attempted return by aborting. */
11810 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
11812 assemble_external_libcall (ops[1]);
11813 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
11819 gcc_assert (!cfun->calls_alloca || really_return);
11821 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
11823 cfun->machine->return_used_this_function = 1;
11825 offsets = arm_get_frame_offsets ();
11826 live_regs_mask = offsets->saved_regs_mask;
11828 if (live_regs_mask)
11830 const char * return_reg;
11832 /* If we do not have any special requirements for function exit
11833 (e.g. interworking) then we can load the return address
11834 directly into the PC. Otherwise we must load it into LR. */
11836 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
11837 return_reg = reg_names[PC_REGNUM];
11839 return_reg = reg_names[LR_REGNUM];
11841 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
11843 /* There are three possible reasons for the IP register
11844 being saved. 1) a stack frame was created, in which case
11845 IP contains the old stack pointer, or 2) an ISR routine
11846 corrupted it, or 3) it was saved to align the stack on
11847 iWMMXt. In case 1, restore IP into SP, otherwise just
11849 if (frame_pointer_needed)
11851 live_regs_mask &= ~ (1 << IP_REGNUM);
11852 live_regs_mask |= (1 << SP_REGNUM);
11855 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
11858 /* On some ARM architectures it is faster to use LDR rather than
11859 LDM to load a single register. On other architectures, the
11860 cost is the same. In 26 bit mode, or for exception handlers,
11861 we have to use LDM to load the PC so that the CPSR is also
11863 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11864 if (live_regs_mask == (1U << reg))
11867 if (reg <= LAST_ARM_REGNUM
11868 && (reg != LR_REGNUM
11870 || ! IS_INTERRUPT (func_type)))
11872 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
11873 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
11880 /* Generate the load multiple instruction to restore the
11881 registers. Note we can get here, even if
11882 frame_pointer_needed is true, but only if sp already
11883 points to the base of the saved core registers. */
11884 if (live_regs_mask & (1 << SP_REGNUM))
11886 unsigned HOST_WIDE_INT stack_adjust;
11888 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
11889 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
11891 if (stack_adjust && arm_arch5 && TARGET_ARM)
11892 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
11895 /* If we can't use ldmib (SA110 bug),
11896 then try to pop r3 instead. */
11898 live_regs_mask |= 1 << 3;
11899 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
11903 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
11905 p = instr + strlen (instr);
11907 for (reg = 0; reg <= SP_REGNUM; reg++)
11908 if (live_regs_mask & (1 << reg))
11910 int l = strlen (reg_names[reg]);
11916 memcpy (p, ", ", 2);
11920 memcpy (p, "%|", 2);
11921 memcpy (p + 2, reg_names[reg], l);
11925 if (live_regs_mask & (1 << LR_REGNUM))
11927 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
11928 /* If returning from an interrupt, restore the CPSR. */
11929 if (IS_INTERRUPT (func_type))
11936 output_asm_insn (instr, & operand);
11938 /* See if we need to generate an extra instruction to
11939 perform the actual function return. */
11941 && func_type != ARM_FT_INTERWORKED
11942 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
11944 /* The return has already been handled
11945 by loading the LR into the PC. */
11952 switch ((int) ARM_FUNC_TYPE (func_type))
11956 /* ??? This is wrong for unified assembly syntax. */
11957 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
11960 case ARM_FT_INTERWORKED:
11961 sprintf (instr, "bx%s\t%%|lr", conditional);
11964 case ARM_FT_EXCEPTION:
11965 /* ??? This is wrong for unified assembly syntax. */
11966 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
11970 /* Use bx if it's available. */
11971 if (arm_arch5 || arm_arch4t)
11972 sprintf (instr, "bx%s\t%%|lr", conditional);
11974 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
11978 output_asm_insn (instr, & operand);
11984 /* Write the function name into the code section, directly preceding
11985 the function prologue.
11987 Code will be output similar to this:
11989 .ascii "arm_poke_function_name", 0
11992 .word 0xff000000 + (t1 - t0)
11993 arm_poke_function_name
11995 stmfd sp!, {fp, ip, lr, pc}
11998 When performing a stack backtrace, code can inspect the value
11999 of 'pc' stored at 'fp' + 0. If the trace function then looks
12000 at location pc - 12 and the top 8 bits are set, then we know
12001 that there is a function name embedded immediately preceding this
12002 location and has length ((pc[-3]) & 0xff000000).
12004 We assume that pc is declared as a pointer to an unsigned long.
12006 It is of no benefit to output the function name if we are assembling
12007 a leaf function. These function types will not contain a stack
12008 backtrace structure, therefore it is not possible to determine the
12011 arm_poke_function_name (FILE *stream, const char *name)
12013 unsigned long alignlength;
12014 unsigned long length;
12017 length = strlen (name) + 1;
12018 alignlength = ROUND_UP_WORD (length);
12020 ASM_OUTPUT_ASCII (stream, name, length);
12021 ASM_OUTPUT_ALIGN (stream, 2);
12022 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
12023 assemble_aligned_integer (UNITS_PER_WORD, x);
12026 /* Place some comments into the assembler stream
12027 describing the current function. */
12029 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
12031 unsigned long func_type;
12035 thumb1_output_function_prologue (f, frame_size);
12039 /* Sanity check. */
12040 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
12042 func_type = arm_current_func_type ();
12044 switch ((int) ARM_FUNC_TYPE (func_type))
12047 case ARM_FT_NORMAL:
12049 case ARM_FT_INTERWORKED:
12050 asm_fprintf (f, "\t%@ Function supports interworking.\n");
12053 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
12056 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
12058 case ARM_FT_EXCEPTION:
12059 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
12063 if (IS_NAKED (func_type))
12064 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
12066 if (IS_VOLATILE (func_type))
12067 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
12069 if (IS_NESTED (func_type))
12070 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
12071 if (IS_STACKALIGN (func_type))
12072 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
12074 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
12076 crtl->args.pretend_args_size, frame_size);
12078 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
12079 frame_pointer_needed,
12080 cfun->machine->uses_anonymous_args);
12082 if (cfun->machine->lr_save_eliminated)
12083 asm_fprintf (f, "\t%@ link register save eliminated.\n");
12085 if (crtl->calls_eh_return)
12086 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
12091 arm_output_epilogue (rtx sibling)
12094 unsigned long saved_regs_mask;
12095 unsigned long func_type;
12096 /* Floats_offset is the offset from the "virtual" frame. In an APCS
12097 frame that is $fp + 4 for a non-variadic function. */
12098 int floats_offset = 0;
12100 FILE * f = asm_out_file;
12101 unsigned int lrm_count = 0;
12102 int really_return = (sibling == NULL);
12104 arm_stack_offsets *offsets;
12106 /* If we have already generated the return instruction
12107 then it is futile to generate anything else. */
12108 if (use_return_insn (FALSE, sibling) &&
12109 (cfun->machine->return_used_this_function != 0))
12112 func_type = arm_current_func_type ();
12114 if (IS_NAKED (func_type))
12115 /* Naked functions don't have epilogues. */
12118 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
12122 /* A volatile function should never return. Call abort. */
12123 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
12124 assemble_external_libcall (op);
12125 output_asm_insn ("bl\t%a0", &op);
12130 /* If we are throwing an exception, then we really must be doing a
12131 return, so we can't tail-call. */
12132 gcc_assert (!crtl->calls_eh_return || really_return);
12134 offsets = arm_get_frame_offsets ();
12135 saved_regs_mask = offsets->saved_regs_mask;
12138 lrm_count = bit_count (saved_regs_mask);
12140 floats_offset = offsets->saved_args;
12141 /* Compute how far away the floats will be. */
12142 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
12143 if (saved_regs_mask & (1 << reg))
12144 floats_offset += 4;
12146 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
12148 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
12149 int vfp_offset = offsets->frame;
12151 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12153 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12154 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12156 floats_offset += 12;
12157 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
12158 reg, FP_REGNUM, floats_offset - vfp_offset);
12163 start_reg = LAST_FPA_REGNUM;
12165 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12167 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12169 floats_offset += 12;
12171 /* We can't unstack more than four registers at once. */
12172 if (start_reg - reg == 3)
12174 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
12175 reg, FP_REGNUM, floats_offset - vfp_offset);
12176 start_reg = reg - 1;
12181 if (reg != start_reg)
12182 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
12183 reg + 1, start_reg - reg,
12184 FP_REGNUM, floats_offset - vfp_offset);
12185 start_reg = reg - 1;
12189 /* Just in case the last register checked also needs unstacking. */
12190 if (reg != start_reg)
12191 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
12192 reg + 1, start_reg - reg,
12193 FP_REGNUM, floats_offset - vfp_offset);
12196 if (TARGET_HARD_FLOAT && TARGET_VFP)
12200 /* The fldmd insns do not have base+offset addressing
12201 modes, so we use IP to hold the address. */
12202 saved_size = arm_get_vfp_saved_size ();
12204 if (saved_size > 0)
12206 floats_offset += saved_size;
12207 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
12208 FP_REGNUM, floats_offset - vfp_offset);
12210 start_reg = FIRST_VFP_REGNUM;
12211 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12213 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12214 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12216 if (start_reg != reg)
12217 vfp_output_fldmd (f, IP_REGNUM,
12218 (start_reg - FIRST_VFP_REGNUM) / 2,
12219 (reg - start_reg) / 2);
12220 start_reg = reg + 2;
12223 if (start_reg != reg)
12224 vfp_output_fldmd (f, IP_REGNUM,
12225 (start_reg - FIRST_VFP_REGNUM) / 2,
12226 (reg - start_reg) / 2);
12231 /* The frame pointer is guaranteed to be non-double-word aligned.
12232 This is because it is set to (old_stack_pointer - 4) and the
12233 old_stack_pointer was double word aligned. Thus the offset to
12234 the iWMMXt registers to be loaded must also be non-double-word
12235 sized, so that the resultant address *is* double-word aligned.
12236 We can ignore floats_offset since that was already included in
12237 the live_regs_mask. */
12238 lrm_count += (lrm_count % 2 ? 2 : 1);
12240 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12241 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12243 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
12244 reg, FP_REGNUM, lrm_count * 4);
12249 /* saved_regs_mask should contain the IP, which at the time of stack
12250 frame generation actually contains the old stack pointer. So a
12251 quick way to unwind the stack is just pop the IP register directly
12252 into the stack pointer. */
12253 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
12254 saved_regs_mask &= ~ (1 << IP_REGNUM);
12255 saved_regs_mask |= (1 << SP_REGNUM);
12257 /* There are two registers left in saved_regs_mask - LR and PC. We
12258 only need to restore the LR register (the return address), but to
12259 save time we can load it directly into the PC, unless we need a
12260 special function exit sequence, or we are not really returning. */
12262 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
12263 && !crtl->calls_eh_return)
12264 /* Delete the LR from the register mask, so that the LR on
12265 the stack is loaded into the PC in the register mask. */
12266 saved_regs_mask &= ~ (1 << LR_REGNUM);
12268 saved_regs_mask &= ~ (1 << PC_REGNUM);
12270 /* We must use SP as the base register, because SP is one of the
12271 registers being restored. If an interrupt or page fault
12272 happens in the ldm instruction, the SP might or might not
12273 have been restored. That would be bad, as then SP will no
12274 longer indicate the safe area of stack, and we can get stack
12275 corruption. Using SP as the base register means that it will
12276 be reset correctly to the original value, should an interrupt
12277 occur. If the stack pointer already points at the right
12278 place, then omit the subtraction. */
12279 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
12280 || cfun->calls_alloca)
12281 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
12282 4 * bit_count (saved_regs_mask));
12283 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
12285 if (IS_INTERRUPT (func_type))
12286 /* Interrupt handlers will have pushed the
12287 IP onto the stack, so restore it now. */
12288 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
12292 /* This branch is executed for ARM mode (non-apcs frames) and
12293 Thumb-2 mode. Frame layout is essentially the same for those
12294 cases, except that in ARM mode frame pointer points to the
12295 first saved register, while in Thumb-2 mode the frame pointer points
12296 to the last saved register.
12298 It is possible to make frame pointer point to last saved
12299 register in both cases, and remove some conditionals below.
12300 That means that fp setup in prologue would be just "mov fp, sp"
12301 and sp restore in epilogue would be just "mov sp, fp", whereas
12302 now we have to use add/sub in those cases. However, the value
12303 of that would be marginal, as both mov and add/sub are 32-bit
12304 in ARM mode, and it would require extra conditionals
12305 in arm_expand_prologue to distingish ARM-apcs-frame case
12306 (where frame pointer is required to point at first register)
12307 and ARM-non-apcs-frame. Therefore, such change is postponed
12308 until real need arise. */
12309 unsigned HOST_WIDE_INT amount;
12311 /* Restore stack pointer if necessary. */
12312 if (TARGET_ARM && frame_pointer_needed)
12314 operands[0] = stack_pointer_rtx;
12315 operands[1] = hard_frame_pointer_rtx;
12317 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
12318 output_add_immediate (operands);
12322 if (frame_pointer_needed)
12324 /* For Thumb-2 restore sp from the frame pointer.
12325 Operand restrictions mean we have to incrememnt FP, then copy
12327 amount = offsets->locals_base - offsets->saved_regs;
12328 operands[0] = hard_frame_pointer_rtx;
12332 unsigned long count;
12333 operands[0] = stack_pointer_rtx;
12334 amount = offsets->outgoing_args - offsets->saved_regs;
12335 /* pop call clobbered registers if it avoids a
12336 separate stack adjustment. */
12337 count = offsets->saved_regs - offsets->saved_args;
12340 && !crtl->calls_eh_return
12341 && bit_count(saved_regs_mask) * 4 == count
12342 && !IS_INTERRUPT (func_type)
12343 && !crtl->tail_call_emit)
12345 unsigned long mask;
12346 mask = (1 << (arm_size_return_regs() / 4)) - 1;
12348 mask &= ~saved_regs_mask;
12350 while (bit_count (mask) * 4 > amount)
12352 while ((mask & (1 << reg)) == 0)
12354 mask &= ~(1 << reg);
12356 if (bit_count (mask) * 4 == amount) {
12358 saved_regs_mask |= mask;
12365 operands[1] = operands[0];
12366 operands[2] = GEN_INT (amount);
12367 output_add_immediate (operands);
12369 if (frame_pointer_needed)
12370 asm_fprintf (f, "\tmov\t%r, %r\n",
12371 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
12374 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12376 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
12377 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12378 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
12383 start_reg = FIRST_FPA_REGNUM;
12385 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
12387 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12389 if (reg - start_reg == 3)
12391 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
12392 start_reg, SP_REGNUM);
12393 start_reg = reg + 1;
12398 if (reg != start_reg)
12399 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
12400 start_reg, reg - start_reg,
12403 start_reg = reg + 1;
12407 /* Just in case the last register checked also needs unstacking. */
12408 if (reg != start_reg)
12409 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
12410 start_reg, reg - start_reg, SP_REGNUM);
12413 if (TARGET_HARD_FLOAT && TARGET_VFP)
12415 start_reg = FIRST_VFP_REGNUM;
12416 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12418 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12419 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12421 if (start_reg != reg)
12422 vfp_output_fldmd (f, SP_REGNUM,
12423 (start_reg - FIRST_VFP_REGNUM) / 2,
12424 (reg - start_reg) / 2);
12425 start_reg = reg + 2;
12428 if (start_reg != reg)
12429 vfp_output_fldmd (f, SP_REGNUM,
12430 (start_reg - FIRST_VFP_REGNUM) / 2,
12431 (reg - start_reg) / 2);
12434 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
12435 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12436 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
12438 /* If we can, restore the LR into the PC. */
12439 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
12440 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
12441 && !IS_STACKALIGN (func_type)
12443 && crtl->args.pretend_args_size == 0
12444 && saved_regs_mask & (1 << LR_REGNUM)
12445 && !crtl->calls_eh_return)
12447 saved_regs_mask &= ~ (1 << LR_REGNUM);
12448 saved_regs_mask |= (1 << PC_REGNUM);
12449 rfe = IS_INTERRUPT (func_type);
12454 /* Load the registers off the stack. If we only have one register
12455 to load use the LDR instruction - it is faster. For Thumb-2
12456 always use pop and the assembler will pick the best instruction.*/
12457 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
12458 && !IS_INTERRUPT(func_type))
12460 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
12462 else if (saved_regs_mask)
12464 if (saved_regs_mask & (1 << SP_REGNUM))
12465 /* Note - write back to the stack register is not enabled
12466 (i.e. "ldmfd sp!..."). We know that the stack pointer is
12467 in the list of registers and if we add writeback the
12468 instruction becomes UNPREDICTABLE. */
12469 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
12471 else if (TARGET_ARM)
12472 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
12475 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
12478 if (crtl->args.pretend_args_size)
12480 /* Unwind the pre-pushed regs. */
12481 operands[0] = operands[1] = stack_pointer_rtx;
12482 operands[2] = GEN_INT (crtl->args.pretend_args_size);
12483 output_add_immediate (operands);
12487 /* We may have already restored PC directly from the stack. */
12488 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
12491 /* Stack adjustment for exception handler. */
12492 if (crtl->calls_eh_return)
12493 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
12494 ARM_EH_STACKADJ_REGNUM);
12496 /* Generate the return instruction. */
12497 switch ((int) ARM_FUNC_TYPE (func_type))
12501 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
12504 case ARM_FT_EXCEPTION:
12505 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
12508 case ARM_FT_INTERWORKED:
12509 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
12513 if (IS_STACKALIGN (func_type))
12515 /* See comment in arm_expand_prologue. */
12516 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
12518 if (arm_arch5 || arm_arch4t)
12519 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
12521 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
12529 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
12530 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
12532 arm_stack_offsets *offsets;
12538 /* Emit any call-via-reg trampolines that are needed for v4t support
12539 of call_reg and call_value_reg type insns. */
12540 for (regno = 0; regno < LR_REGNUM; regno++)
12542 rtx label = cfun->machine->call_via[regno];
12546 switch_to_section (function_section (current_function_decl));
12547 targetm.asm_out.internal_label (asm_out_file, "L",
12548 CODE_LABEL_NUMBER (label));
12549 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
12553 /* ??? Probably not safe to set this here, since it assumes that a
12554 function will be emitted as assembly immediately after we generate
12555 RTL for it. This does not happen for inline functions. */
12556 cfun->machine->return_used_this_function = 0;
12558 else /* TARGET_32BIT */
12560 /* We need to take into account any stack-frame rounding. */
12561 offsets = arm_get_frame_offsets ();
12563 gcc_assert (!use_return_insn (FALSE, NULL)
12564 || (cfun->machine->return_used_this_function != 0)
12565 || offsets->saved_regs == offsets->outgoing_args
12566 || frame_pointer_needed);
12568 /* Reset the ARM-specific per-function variables. */
12569 after_arm_reorg = 0;
12573 /* Generate and emit an insn that we will recognize as a push_multi.
12574 Unfortunately, since this insn does not reflect very well the actual
12575 semantics of the operation, we need to annotate the insn for the benefit
12576 of DWARF2 frame unwind information. */
12578 emit_multi_reg_push (unsigned long mask)
12581 int num_dwarf_regs;
12585 int dwarf_par_index;
12588 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12589 if (mask & (1 << i))
12592 gcc_assert (num_regs && num_regs <= 16);
12594 /* We don't record the PC in the dwarf frame information. */
12595 num_dwarf_regs = num_regs;
12596 if (mask & (1 << PC_REGNUM))
12599 /* For the body of the insn we are going to generate an UNSPEC in
12600 parallel with several USEs. This allows the insn to be recognized
12601 by the push_multi pattern in the arm.md file. The insn looks
12602 something like this:
12605 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
12606 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
12607 (use (reg:SI 11 fp))
12608 (use (reg:SI 12 ip))
12609 (use (reg:SI 14 lr))
12610 (use (reg:SI 15 pc))
12613 For the frame note however, we try to be more explicit and actually
12614 show each register being stored into the stack frame, plus a (single)
12615 decrement of the stack pointer. We do it this way in order to be
12616 friendly to the stack unwinding code, which only wants to see a single
12617 stack decrement per instruction. The RTL we generate for the note looks
12618 something like this:
12621 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
12622 (set (mem:SI (reg:SI sp)) (reg:SI r4))
12623 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
12624 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
12625 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
12628 This sequence is used both by the code to support stack unwinding for
12629 exceptions handlers and the code to generate dwarf2 frame debugging. */
12631 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
12632 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
12633 dwarf_par_index = 1;
12635 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12637 if (mask & (1 << i))
12639 reg = gen_rtx_REG (SImode, i);
12641 XVECEXP (par, 0, 0)
12642 = gen_rtx_SET (VOIDmode,
12643 gen_frame_mem (BLKmode,
12644 gen_rtx_PRE_DEC (BLKmode,
12645 stack_pointer_rtx)),
12646 gen_rtx_UNSPEC (BLKmode,
12647 gen_rtvec (1, reg),
12648 UNSPEC_PUSH_MULT));
12650 if (i != PC_REGNUM)
12652 tmp = gen_rtx_SET (VOIDmode,
12653 gen_frame_mem (SImode, stack_pointer_rtx),
12655 RTX_FRAME_RELATED_P (tmp) = 1;
12656 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
12664 for (j = 1, i++; j < num_regs; i++)
12666 if (mask & (1 << i))
12668 reg = gen_rtx_REG (SImode, i);
12670 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
12672 if (i != PC_REGNUM)
12675 = gen_rtx_SET (VOIDmode,
12676 gen_frame_mem (SImode,
12677 plus_constant (stack_pointer_rtx,
12680 RTX_FRAME_RELATED_P (tmp) = 1;
12681 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
12688 par = emit_insn (par);
12690 tmp = gen_rtx_SET (VOIDmode,
12692 plus_constant (stack_pointer_rtx, -4 * num_regs));
12693 RTX_FRAME_RELATED_P (tmp) = 1;
12694 XVECEXP (dwarf, 0, 0) = tmp;
12696 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12701 /* Calculate the size of the return value that is passed in registers. */
12703 arm_size_return_regs (void)
12705 enum machine_mode mode;
12707 if (crtl->return_rtx != 0)
12708 mode = GET_MODE (crtl->return_rtx);
12710 mode = DECL_MODE (DECL_RESULT (current_function_decl));
12712 return GET_MODE_SIZE (mode);
12716 emit_sfm (int base_reg, int count)
12723 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12724 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12726 reg = gen_rtx_REG (XFmode, base_reg++);
12728 XVECEXP (par, 0, 0)
12729 = gen_rtx_SET (VOIDmode,
12730 gen_frame_mem (BLKmode,
12731 gen_rtx_PRE_DEC (BLKmode,
12732 stack_pointer_rtx)),
12733 gen_rtx_UNSPEC (BLKmode,
12734 gen_rtvec (1, reg),
12735 UNSPEC_PUSH_MULT));
12736 tmp = gen_rtx_SET (VOIDmode,
12737 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
12738 RTX_FRAME_RELATED_P (tmp) = 1;
12739 XVECEXP (dwarf, 0, 1) = tmp;
12741 for (i = 1; i < count; i++)
12743 reg = gen_rtx_REG (XFmode, base_reg++);
12744 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12746 tmp = gen_rtx_SET (VOIDmode,
12747 gen_frame_mem (XFmode,
12748 plus_constant (stack_pointer_rtx,
12751 RTX_FRAME_RELATED_P (tmp) = 1;
12752 XVECEXP (dwarf, 0, i + 1) = tmp;
12755 tmp = gen_rtx_SET (VOIDmode,
12757 plus_constant (stack_pointer_rtx, -12 * count));
12759 RTX_FRAME_RELATED_P (tmp) = 1;
12760 XVECEXP (dwarf, 0, 0) = tmp;
12762 par = emit_insn (par);
12763 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12769 /* Return true if the current function needs to save/restore LR. */
12772 thumb_force_lr_save (void)
12774 return !cfun->machine->lr_save_eliminated
12775 && (!leaf_function_p ()
12776 || thumb_far_jump_used_p ()
12777 || df_regs_ever_live_p (LR_REGNUM));
12781 /* Compute the distance from register FROM to register TO.
12782 These can be the arg pointer (26), the soft frame pointer (25),
12783 the stack pointer (13) or the hard frame pointer (11).
12784 In thumb mode r7 is used as the soft frame pointer, if needed.
12785 Typical stack layout looks like this:
12787 old stack pointer -> | |
12790 | | saved arguments for
12791 | | vararg functions
12794 hard FP & arg pointer -> | | \
12802 soft frame pointer -> | | /
12807 locals base pointer -> | | /
12812 current stack pointer -> | | /
12815 For a given function some or all of these stack components
12816 may not be needed, giving rise to the possibility of
12817 eliminating some of the registers.
12819 The values returned by this function must reflect the behavior
12820 of arm_expand_prologue() and arm_compute_save_reg_mask().
12822 The sign of the number returned reflects the direction of stack
12823 growth, so the values are positive for all eliminations except
12824 from the soft frame pointer to the hard frame pointer.
12826 SFP may point just inside the local variables block to ensure correct
12830 /* Calculate stack offsets. These are used to calculate register elimination
12831 offsets and in prologue/epilogue code. Also calculates which registers
12832 should be saved. */
12834 static arm_stack_offsets *
12835 arm_get_frame_offsets (void)
12837 struct arm_stack_offsets *offsets;
12838 unsigned long func_type;
12842 HOST_WIDE_INT frame_size;
12845 offsets = &cfun->machine->stack_offsets;
12847 /* We need to know if we are a leaf function. Unfortunately, it
12848 is possible to be called after start_sequence has been called,
12849 which causes get_insns to return the insns for the sequence,
12850 not the function, which will cause leaf_function_p to return
12851 the incorrect result.
12853 to know about leaf functions once reload has completed, and the
12854 frame size cannot be changed after that time, so we can safely
12855 use the cached value. */
12857 if (reload_completed)
12860 /* Initially this is the size of the local variables. It will translated
12861 into an offset once we have determined the size of preceding data. */
12862 frame_size = ROUND_UP_WORD (get_frame_size ());
12864 leaf = leaf_function_p ();
12866 /* Space for variadic functions. */
12867 offsets->saved_args = crtl->args.pretend_args_size;
12869 /* In Thumb mode this is incorrect, but never used. */
12870 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
12871 arm_compute_static_chain_stack_bytes();
12875 unsigned int regno;
12877 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
12878 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12879 saved = core_saved;
12881 /* We know that SP will be doubleword aligned on entry, and we must
12882 preserve that condition at any subroutine call. We also require the
12883 soft frame pointer to be doubleword aligned. */
12885 if (TARGET_REALLY_IWMMXT)
12887 /* Check for the call-saved iWMMXt registers. */
12888 for (regno = FIRST_IWMMXT_REGNUM;
12889 regno <= LAST_IWMMXT_REGNUM;
12891 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12895 func_type = arm_current_func_type ();
12896 if (! IS_VOLATILE (func_type))
12898 /* Space for saved FPA registers. */
12899 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
12900 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12903 /* Space for saved VFP registers. */
12904 if (TARGET_HARD_FLOAT && TARGET_VFP)
12905 saved += arm_get_vfp_saved_size ();
12908 else /* TARGET_THUMB1 */
12910 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
12911 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12912 saved = core_saved;
12913 if (TARGET_BACKTRACE)
12917 /* Saved registers include the stack frame. */
12918 offsets->saved_regs = offsets->saved_args + saved +
12919 arm_compute_static_chain_stack_bytes();
12920 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
12921 /* A leaf function does not need any stack alignment if it has nothing
12923 if (leaf && frame_size == 0)
12925 offsets->outgoing_args = offsets->soft_frame;
12926 offsets->locals_base = offsets->soft_frame;
12930 /* Ensure SFP has the correct alignment. */
12931 if (ARM_DOUBLEWORD_ALIGN
12932 && (offsets->soft_frame & 7))
12934 offsets->soft_frame += 4;
12935 /* Try to align stack by pushing an extra reg. Don't bother doing this
12936 when there is a stack frame as the alignment will be rolled into
12937 the normal stack adjustment. */
12938 if (frame_size + crtl->outgoing_args_size == 0)
12942 /* If it is safe to use r3, then do so. This sometimes
12943 generates better code on Thumb-2 by avoiding the need to
12944 use 32-bit push/pop instructions. */
12945 if (!crtl->tail_call_emit
12946 && arm_size_return_regs () <= 12)
12951 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
12953 if ((offsets->saved_regs_mask & (1 << i)) == 0)
12962 offsets->saved_regs += 4;
12963 offsets->saved_regs_mask |= (1 << reg);
12968 offsets->locals_base = offsets->soft_frame + frame_size;
12969 offsets->outgoing_args = (offsets->locals_base
12970 + crtl->outgoing_args_size);
12972 if (ARM_DOUBLEWORD_ALIGN)
12974 /* Ensure SP remains doubleword aligned. */
12975 if (offsets->outgoing_args & 7)
12976 offsets->outgoing_args += 4;
12977 gcc_assert (!(offsets->outgoing_args & 7));
12984 /* Calculate the relative offsets for the different stack pointers. Positive
12985 offsets are in the direction of stack growth. */
12988 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
12990 arm_stack_offsets *offsets;
12992 offsets = arm_get_frame_offsets ();
12994 /* OK, now we have enough information to compute the distances.
12995 There must be an entry in these switch tables for each pair
12996 of registers in ELIMINABLE_REGS, even if some of the entries
12997 seem to be redundant or useless. */
13000 case ARG_POINTER_REGNUM:
13003 case THUMB_HARD_FRAME_POINTER_REGNUM:
13006 case FRAME_POINTER_REGNUM:
13007 /* This is the reverse of the soft frame pointer
13008 to hard frame pointer elimination below. */
13009 return offsets->soft_frame - offsets->saved_args;
13011 case ARM_HARD_FRAME_POINTER_REGNUM:
13012 /* This is only non-zero in the case where the static chain register
13013 is stored above the frame. */
13014 return offsets->frame - offsets->saved_args - 4;
13016 case STACK_POINTER_REGNUM:
13017 /* If nothing has been pushed on the stack at all
13018 then this will return -4. This *is* correct! */
13019 return offsets->outgoing_args - (offsets->saved_args + 4);
13022 gcc_unreachable ();
13024 gcc_unreachable ();
13026 case FRAME_POINTER_REGNUM:
13029 case THUMB_HARD_FRAME_POINTER_REGNUM:
13032 case ARM_HARD_FRAME_POINTER_REGNUM:
13033 /* The hard frame pointer points to the top entry in the
13034 stack frame. The soft frame pointer to the bottom entry
13035 in the stack frame. If there is no stack frame at all,
13036 then they are identical. */
13038 return offsets->frame - offsets->soft_frame;
13040 case STACK_POINTER_REGNUM:
13041 return offsets->outgoing_args - offsets->soft_frame;
13044 gcc_unreachable ();
13046 gcc_unreachable ();
13049 /* You cannot eliminate from the stack pointer.
13050 In theory you could eliminate from the hard frame
13051 pointer to the stack pointer, but this will never
13052 happen, since if a stack frame is not needed the
13053 hard frame pointer will never be used. */
13054 gcc_unreachable ();
13059 /* Emit RTL to save coprocessor registers on function entry. Returns the
13060 number of bytes pushed. */
13063 arm_save_coproc_regs(void)
13065 int saved_size = 0;
13067 unsigned start_reg;
13070 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13071 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13073 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
13074 insn = gen_rtx_MEM (V2SImode, insn);
13075 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
13076 RTX_FRAME_RELATED_P (insn) = 1;
13080 /* Save any floating point call-saved registers used by this
13082 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
13084 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13085 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13087 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
13088 insn = gen_rtx_MEM (XFmode, insn);
13089 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
13090 RTX_FRAME_RELATED_P (insn) = 1;
13096 start_reg = LAST_FPA_REGNUM;
13098 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13100 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13102 if (start_reg - reg == 3)
13104 insn = emit_sfm (reg, 4);
13105 RTX_FRAME_RELATED_P (insn) = 1;
13107 start_reg = reg - 1;
13112 if (start_reg != reg)
13114 insn = emit_sfm (reg + 1, start_reg - reg);
13115 RTX_FRAME_RELATED_P (insn) = 1;
13116 saved_size += (start_reg - reg) * 12;
13118 start_reg = reg - 1;
13122 if (start_reg != reg)
13124 insn = emit_sfm (reg + 1, start_reg - reg);
13125 saved_size += (start_reg - reg) * 12;
13126 RTX_FRAME_RELATED_P (insn) = 1;
13129 if (TARGET_HARD_FLOAT && TARGET_VFP)
13131 start_reg = FIRST_VFP_REGNUM;
13133 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13135 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13136 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13138 if (start_reg != reg)
13139 saved_size += vfp_emit_fstmd (start_reg,
13140 (reg - start_reg) / 2);
13141 start_reg = reg + 2;
13144 if (start_reg != reg)
13145 saved_size += vfp_emit_fstmd (start_reg,
13146 (reg - start_reg) / 2);
13152 /* Set the Thumb frame pointer from the stack pointer. */
13155 thumb_set_frame_pointer (arm_stack_offsets *offsets)
13157 HOST_WIDE_INT amount;
13160 amount = offsets->outgoing_args - offsets->locals_base;
13162 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13163 stack_pointer_rtx, GEN_INT (amount)));
13166 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
13167 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
13168 expects the first two operands to be the same. */
13171 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13173 hard_frame_pointer_rtx));
13177 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13178 hard_frame_pointer_rtx,
13179 stack_pointer_rtx));
13181 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
13182 plus_constant (stack_pointer_rtx, amount));
13183 RTX_FRAME_RELATED_P (dwarf) = 1;
13184 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
13187 RTX_FRAME_RELATED_P (insn) = 1;
13190 /* Generate the prologue instructions for entry into an ARM or Thumb-2
13193 arm_expand_prologue (void)
13198 unsigned long live_regs_mask;
13199 unsigned long func_type;
13201 int saved_pretend_args = 0;
13202 int saved_regs = 0;
13203 unsigned HOST_WIDE_INT args_to_push;
13204 arm_stack_offsets *offsets;
13206 func_type = arm_current_func_type ();
13208 /* Naked functions don't have prologues. */
13209 if (IS_NAKED (func_type))
13212 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
13213 args_to_push = crtl->args.pretend_args_size;
13215 /* Compute which register we will have to save onto the stack. */
13216 offsets = arm_get_frame_offsets ();
13217 live_regs_mask = offsets->saved_regs_mask;
13219 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
13221 if (IS_STACKALIGN (func_type))
13226 /* Handle a word-aligned stack pointer. We generate the following:
13231 <save and restore r0 in normal prologue/epilogue>
13235 The unwinder doesn't need to know about the stack realignment.
13236 Just tell it we saved SP in r0. */
13237 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
13239 r0 = gen_rtx_REG (SImode, 0);
13240 r1 = gen_rtx_REG (SImode, 1);
13241 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
13242 compiler won't choke. */
13243 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
13244 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
13245 insn = gen_movsi (r0, stack_pointer_rtx);
13246 RTX_FRAME_RELATED_P (insn) = 1;
13247 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
13249 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
13250 emit_insn (gen_movsi (stack_pointer_rtx, r1));
13253 /* For APCS frames, if IP register is clobbered
13254 when creating frame, save that register in a special
13256 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13258 if (IS_INTERRUPT (func_type))
13260 /* Interrupt functions must not corrupt any registers.
13261 Creating a frame pointer however, corrupts the IP
13262 register, so we must push it first. */
13263 insn = emit_multi_reg_push (1 << IP_REGNUM);
13265 /* Do not set RTX_FRAME_RELATED_P on this insn.
13266 The dwarf stack unwinding code only wants to see one
13267 stack decrement per function, and this is not it. If
13268 this instruction is labeled as being part of the frame
13269 creation sequence then dwarf2out_frame_debug_expr will
13270 die when it encounters the assignment of IP to FP
13271 later on, since the use of SP here establishes SP as
13272 the CFA register and not IP.
13274 Anyway this instruction is not really part of the stack
13275 frame creation although it is part of the prologue. */
13277 else if (IS_NESTED (func_type))
13279 /* The Static chain register is the same as the IP register
13280 used as a scratch register during stack frame creation.
13281 To get around this need to find somewhere to store IP
13282 whilst the frame is being created. We try the following
13285 1. The last argument register.
13286 2. A slot on the stack above the frame. (This only
13287 works if the function is not a varargs function).
13288 3. Register r3, after pushing the argument registers
13291 Note - we only need to tell the dwarf2 backend about the SP
13292 adjustment in the second variant; the static chain register
13293 doesn't need to be unwound, as it doesn't contain a value
13294 inherited from the caller. */
13296 if (df_regs_ever_live_p (3) == false)
13297 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
13298 else if (args_to_push == 0)
13302 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
13305 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
13306 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
13309 /* Just tell the dwarf backend that we adjusted SP. */
13310 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13311 plus_constant (stack_pointer_rtx,
13313 RTX_FRAME_RELATED_P (insn) = 1;
13314 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
13318 /* Store the args on the stack. */
13319 if (cfun->machine->uses_anonymous_args)
13320 insn = emit_multi_reg_push
13321 ((0xf0 >> (args_to_push / 4)) & 0xf);
13324 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13325 GEN_INT (- args_to_push)));
13327 RTX_FRAME_RELATED_P (insn) = 1;
13329 saved_pretend_args = 1;
13330 fp_offset = args_to_push;
13333 /* Now reuse r3 to preserve IP. */
13334 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
13338 insn = emit_set_insn (ip_rtx,
13339 plus_constant (stack_pointer_rtx, fp_offset));
13340 RTX_FRAME_RELATED_P (insn) = 1;
13345 /* Push the argument registers, or reserve space for them. */
13346 if (cfun->machine->uses_anonymous_args)
13347 insn = emit_multi_reg_push
13348 ((0xf0 >> (args_to_push / 4)) & 0xf);
13351 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13352 GEN_INT (- args_to_push)));
13353 RTX_FRAME_RELATED_P (insn) = 1;
13356 /* If this is an interrupt service routine, and the link register
13357 is going to be pushed, and we're not generating extra
13358 push of IP (needed when frame is needed and frame layout if apcs),
13359 subtracting four from LR now will mean that the function return
13360 can be done with a single instruction. */
13361 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
13362 && (live_regs_mask & (1 << LR_REGNUM)) != 0
13363 && !(frame_pointer_needed && TARGET_APCS_FRAME)
13366 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
13368 emit_set_insn (lr, plus_constant (lr, -4));
13371 if (live_regs_mask)
13373 saved_regs += bit_count (live_regs_mask) * 4;
13374 if (optimize_size && !frame_pointer_needed
13375 && saved_regs == offsets->saved_regs - offsets->saved_args)
13377 /* If no coprocessor registers are being pushed and we don't have
13378 to worry about a frame pointer then push extra registers to
13379 create the stack frame. This is done is a way that does not
13380 alter the frame layout, so is independent of the epilogue. */
13384 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
13386 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
13387 if (frame && n * 4 >= frame)
13390 live_regs_mask |= (1 << n) - 1;
13391 saved_regs += frame;
13394 insn = emit_multi_reg_push (live_regs_mask);
13395 RTX_FRAME_RELATED_P (insn) = 1;
13398 if (! IS_VOLATILE (func_type))
13399 saved_regs += arm_save_coproc_regs ();
13401 if (frame_pointer_needed && TARGET_ARM)
13403 /* Create the new frame pointer. */
13404 if (TARGET_APCS_FRAME)
13406 insn = GEN_INT (-(4 + args_to_push + fp_offset));
13407 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
13408 RTX_FRAME_RELATED_P (insn) = 1;
13410 if (IS_NESTED (func_type))
13412 /* Recover the static chain register. */
13413 if (!df_regs_ever_live_p (3)
13414 || saved_pretend_args)
13415 insn = gen_rtx_REG (SImode, 3);
13416 else /* if (crtl->args.pretend_args_size == 0) */
13418 insn = plus_constant (hard_frame_pointer_rtx, 4);
13419 insn = gen_frame_mem (SImode, insn);
13421 emit_set_insn (ip_rtx, insn);
13422 /* Add a USE to stop propagate_one_insn() from barfing. */
13423 emit_insn (gen_prologue_use (ip_rtx));
13428 insn = GEN_INT (saved_regs - 4);
13429 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
13430 stack_pointer_rtx, insn));
13431 RTX_FRAME_RELATED_P (insn) = 1;
13435 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
13437 /* This add can produce multiple insns for a large constant, so we
13438 need to get tricky. */
13439 rtx last = get_last_insn ();
13441 amount = GEN_INT (offsets->saved_args + saved_regs
13442 - offsets->outgoing_args);
13444 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13448 last = last ? NEXT_INSN (last) : get_insns ();
13449 RTX_FRAME_RELATED_P (last) = 1;
13451 while (last != insn);
13453 /* If the frame pointer is needed, emit a special barrier that
13454 will prevent the scheduler from moving stores to the frame
13455 before the stack adjustment. */
13456 if (frame_pointer_needed)
13457 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
13458 hard_frame_pointer_rtx));
13462 if (frame_pointer_needed && TARGET_THUMB2)
13463 thumb_set_frame_pointer (offsets);
13465 if (flag_pic && arm_pic_register != INVALID_REGNUM)
13467 unsigned long mask;
13469 mask = live_regs_mask;
13470 mask &= THUMB2_WORK_REGS;
13471 if (!IS_NESTED (func_type))
13472 mask |= (1 << IP_REGNUM);
13473 arm_load_pic_register (mask);
13476 /* If we are profiling, make sure no instructions are scheduled before
13477 the call to mcount. Similarly if the user has requested no
13478 scheduling in the prolog. Similarly if we want non-call exceptions
13479 using the EABI unwinder, to prevent faulting instructions from being
13480 swapped with a stack adjustment. */
13481 if (crtl->profile || !TARGET_SCHED_PROLOG
13482 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
13483 emit_insn (gen_blockage ());
13485 /* If the link register is being kept alive, with the return address in it,
13486 then make sure that it does not get reused by the ce2 pass. */
13487 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
13488 cfun->machine->lr_save_eliminated = 1;
13491 /* Print condition code to STREAM. Helper function for arm_print_operand. */
13493 arm_print_condition (FILE *stream)
13495 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
13497 /* Branch conversion is not implemented for Thumb-2. */
13500 output_operand_lossage ("predicated Thumb instruction");
13503 if (current_insn_predicate != NULL)
13505 output_operand_lossage
13506 ("predicated instruction in conditional sequence");
13510 fputs (arm_condition_codes[arm_current_cc], stream);
13512 else if (current_insn_predicate)
13514 enum arm_cond_code code;
13518 output_operand_lossage ("predicated Thumb instruction");
13522 code = get_arm_condition_code (current_insn_predicate);
13523 fputs (arm_condition_codes[code], stream);
13528 /* If CODE is 'd', then the X is a condition operand and the instruction
13529 should only be executed if the condition is true.
13530 if CODE is 'D', then the X is a condition operand and the instruction
13531 should only be executed if the condition is false: however, if the mode
13532 of the comparison is CCFPEmode, then always execute the instruction -- we
13533 do this because in these circumstances !GE does not necessarily imply LT;
13534 in these cases the instruction pattern will take care to make sure that
13535 an instruction containing %d will follow, thereby undoing the effects of
13536 doing this instruction unconditionally.
13537 If CODE is 'N' then X is a floating point operand that must be negated
13539 If CODE is 'B' then output a bitwise inverted value of X (a const int).
13540 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
13542 arm_print_operand (FILE *stream, rtx x, int code)
13547 fputs (ASM_COMMENT_START, stream);
13551 fputs (user_label_prefix, stream);
13555 fputs (REGISTER_PREFIX, stream);
13559 arm_print_condition (stream);
13563 /* Nothing in unified syntax, otherwise the current condition code. */
13564 if (!TARGET_UNIFIED_ASM)
13565 arm_print_condition (stream);
13569 /* The current condition code in unified syntax, otherwise nothing. */
13570 if (TARGET_UNIFIED_ASM)
13571 arm_print_condition (stream);
13575 /* The current condition code for a condition code setting instruction.
13576 Preceded by 's' in unified syntax, otherwise followed by 's'. */
13577 if (TARGET_UNIFIED_ASM)
13579 fputc('s', stream);
13580 arm_print_condition (stream);
13584 arm_print_condition (stream);
13585 fputc('s', stream);
13590 /* If the instruction is conditionally executed then print
13591 the current condition code, otherwise print 's'. */
13592 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
13593 if (current_insn_predicate)
13594 arm_print_condition (stream);
13596 fputc('s', stream);
13599 /* %# is a "break" sequence. It doesn't output anything, but is used to
13600 separate e.g. operand numbers from following text, if that text consists
13601 of further digits which we don't want to be part of the operand
13609 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13610 r = REAL_VALUE_NEGATE (r);
13611 fprintf (stream, "%s", fp_const_from_val (&r));
13615 /* An integer or symbol address without a preceding # sign. */
13617 switch (GET_CODE (x))
13620 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13624 output_addr_const (stream, x);
13628 gcc_unreachable ();
13633 if (GET_CODE (x) == CONST_INT)
13636 val = ARM_SIGN_EXTEND (~INTVAL (x));
13637 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
13641 putc ('~', stream);
13642 output_addr_const (stream, x);
13647 /* The low 16 bits of an immediate constant. */
13648 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
13652 fprintf (stream, "%s", arithmetic_instr (x, 1));
13655 /* Truncate Cirrus shift counts. */
13657 if (GET_CODE (x) == CONST_INT)
13659 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
13662 arm_print_operand (stream, x, 0);
13666 fprintf (stream, "%s", arithmetic_instr (x, 0));
13674 if (!shift_operator (x, SImode))
13676 output_operand_lossage ("invalid shift operand");
13680 shift = shift_op (x, &val);
13684 fprintf (stream, ", %s ", shift);
13686 arm_print_operand (stream, XEXP (x, 1), 0);
13688 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
13693 /* An explanation of the 'Q', 'R' and 'H' register operands:
13695 In a pair of registers containing a DI or DF value the 'Q'
13696 operand returns the register number of the register containing
13697 the least significant part of the value. The 'R' operand returns
13698 the register number of the register containing the most
13699 significant part of the value.
13701 The 'H' operand returns the higher of the two register numbers.
13702 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
13703 same as the 'Q' operand, since the most significant part of the
13704 value is held in the lower number register. The reverse is true
13705 on systems where WORDS_BIG_ENDIAN is false.
13707 The purpose of these operands is to distinguish between cases
13708 where the endian-ness of the values is important (for example
13709 when they are added together), and cases where the endian-ness
13710 is irrelevant, but the order of register operations is important.
13711 For example when loading a value from memory into a register
13712 pair, the endian-ness does not matter. Provided that the value
13713 from the lower memory address is put into the lower numbered
13714 register, and the value from the higher address is put into the
13715 higher numbered register, the load will work regardless of whether
13716 the value being loaded is big-wordian or little-wordian. The
13717 order of the two register loads can matter however, if the address
13718 of the memory location is actually held in one of the registers
13719 being overwritten by the load. */
13721 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13723 output_operand_lossage ("invalid operand for code '%c'", code);
13727 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
13731 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13733 output_operand_lossage ("invalid operand for code '%c'", code);
13737 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
13741 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13743 output_operand_lossage ("invalid operand for code '%c'", code);
13747 asm_fprintf (stream, "%r", REGNO (x) + 1);
13751 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13753 output_operand_lossage ("invalid operand for code '%c'", code);
13757 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
13761 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
13763 output_operand_lossage ("invalid operand for code '%c'", code);
13767 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
13771 asm_fprintf (stream, "%r",
13772 GET_CODE (XEXP (x, 0)) == REG
13773 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
13777 asm_fprintf (stream, "{%r-%r}",
13779 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
13782 /* Like 'M', but writing doubleword vector registers, for use by Neon
13786 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
13787 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
13789 asm_fprintf (stream, "{d%d}", regno);
13791 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
13796 /* CONST_TRUE_RTX means always -- that's the default. */
13797 if (x == const_true_rtx)
13800 if (!COMPARISON_P (x))
13802 output_operand_lossage ("invalid operand for code '%c'", code);
13806 fputs (arm_condition_codes[get_arm_condition_code (x)],
13811 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
13812 want to do that. */
13813 if (x == const_true_rtx)
13815 output_operand_lossage ("instruction never executed");
13818 if (!COMPARISON_P (x))
13820 output_operand_lossage ("invalid operand for code '%c'", code);
13824 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
13825 (get_arm_condition_code (x))],
13829 /* Cirrus registers can be accessed in a variety of ways:
13830 single floating point (f)
13831 double floating point (d)
13833 64bit integer (dx). */
13834 case 'W': /* Cirrus register in F mode. */
13835 case 'X': /* Cirrus register in D mode. */
13836 case 'Y': /* Cirrus register in FX mode. */
13837 case 'Z': /* Cirrus register in DX mode. */
13838 gcc_assert (GET_CODE (x) == REG
13839 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
13841 fprintf (stream, "mv%s%s",
13843 : code == 'X' ? "d"
13844 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
13848 /* Print cirrus register in the mode specified by the register's mode. */
13851 int mode = GET_MODE (x);
13853 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
13855 output_operand_lossage ("invalid operand for code '%c'", code);
13859 fprintf (stream, "mv%s%s",
13860 mode == DFmode ? "d"
13861 : mode == SImode ? "fx"
13862 : mode == DImode ? "dx"
13863 : "f", reg_names[REGNO (x)] + 2);
13869 if (GET_CODE (x) != REG
13870 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
13871 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
13872 /* Bad value for wCG register number. */
13874 output_operand_lossage ("invalid operand for code '%c'", code);
13879 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
13882 /* Print an iWMMXt control register name. */
13884 if (GET_CODE (x) != CONST_INT
13886 || INTVAL (x) >= 16)
13887 /* Bad value for wC register number. */
13889 output_operand_lossage ("invalid operand for code '%c'", code);
13895 static const char * wc_reg_names [16] =
13897 "wCID", "wCon", "wCSSF", "wCASF",
13898 "wC4", "wC5", "wC6", "wC7",
13899 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
13900 "wC12", "wC13", "wC14", "wC15"
13903 fprintf (stream, wc_reg_names [INTVAL (x)]);
13907 /* Print a VFP/Neon double precision or quad precision register name. */
13911 int mode = GET_MODE (x);
13912 int is_quad = (code == 'q');
13915 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
13917 output_operand_lossage ("invalid operand for code '%c'", code);
13921 if (GET_CODE (x) != REG
13922 || !IS_VFP_REGNUM (REGNO (x)))
13924 output_operand_lossage ("invalid operand for code '%c'", code);
13929 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
13930 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
13932 output_operand_lossage ("invalid operand for code '%c'", code);
13936 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
13937 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
13941 /* These two codes print the low/high doubleword register of a Neon quad
13942 register, respectively. For pair-structure types, can also print
13943 low/high quadword registers. */
13947 int mode = GET_MODE (x);
13950 if ((GET_MODE_SIZE (mode) != 16
13951 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
13953 output_operand_lossage ("invalid operand for code '%c'", code);
13958 if (!NEON_REGNO_OK_FOR_QUAD (regno))
13960 output_operand_lossage ("invalid operand for code '%c'", code);
13964 if (GET_MODE_SIZE (mode) == 16)
13965 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
13966 + (code == 'f' ? 1 : 0));
13968 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
13969 + (code == 'f' ? 1 : 0));
13973 /* Print a VFPv3 floating-point constant, represented as an integer
13977 int index = vfp3_const_double_index (x);
13978 gcc_assert (index != -1);
13979 fprintf (stream, "%d", index);
13983 /* Print bits representing opcode features for Neon.
13985 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
13986 and polynomials as unsigned.
13988 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
13990 Bit 2 is 1 for rounding functions, 0 otherwise. */
13992 /* Identify the type as 's', 'u', 'p' or 'f'. */
13995 HOST_WIDE_INT bits = INTVAL (x);
13996 fputc ("uspf"[bits & 3], stream);
14000 /* Likewise, but signed and unsigned integers are both 'i'. */
14003 HOST_WIDE_INT bits = INTVAL (x);
14004 fputc ("iipf"[bits & 3], stream);
14008 /* As for 'T', but emit 'u' instead of 'p'. */
14011 HOST_WIDE_INT bits = INTVAL (x);
14012 fputc ("usuf"[bits & 3], stream);
14016 /* Bit 2: rounding (vs none). */
14019 HOST_WIDE_INT bits = INTVAL (x);
14020 fputs ((bits & 4) != 0 ? "r" : "", stream);
14024 /* Memory operand for vld1/vst1 instruction. */
14028 bool postinc = FALSE;
14029 gcc_assert (GET_CODE (x) == MEM);
14030 addr = XEXP (x, 0);
14031 if (GET_CODE (addr) == POST_INC)
14034 addr = XEXP (addr, 0);
14036 asm_fprintf (stream, "[%r]", REGNO (addr));
14038 fputs("!", stream);
14042 /* Register specifier for vld1.16/vst1.16. Translate the S register
14043 number into a D register number and element index. */
14046 int mode = GET_MODE (x);
14049 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
14051 output_operand_lossage ("invalid operand for code '%c'", code);
14056 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
14058 output_operand_lossage ("invalid operand for code '%c'", code);
14062 regno = regno - FIRST_VFP_REGNUM;
14063 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
14070 output_operand_lossage ("missing operand");
14074 switch (GET_CODE (x))
14077 asm_fprintf (stream, "%r", REGNO (x));
14081 output_memory_reference_mode = GET_MODE (x);
14082 output_address (XEXP (x, 0));
14089 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
14090 sizeof (fpstr), 0, 1);
14091 fprintf (stream, "#%s", fpstr);
14094 fprintf (stream, "#%s", fp_immediate_constant (x));
14098 gcc_assert (GET_CODE (x) != NEG);
14099 fputc ('#', stream);
14100 if (GET_CODE (x) == HIGH)
14102 fputs (":lower16:", stream);
14106 output_addr_const (stream, x);
14112 /* Target hook for assembling integer objects. The ARM version needs to
14113 handle word-sized values specially. */
14115 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
14117 enum machine_mode mode;
14119 if (size == UNITS_PER_WORD && aligned_p)
14121 fputs ("\t.word\t", asm_out_file);
14122 output_addr_const (asm_out_file, x);
14124 /* Mark symbols as position independent. We only do this in the
14125 .text segment, not in the .data segment. */
14126 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
14127 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
14129 /* See legitimize_pic_address for an explanation of the
14130 TARGET_VXWORKS_RTP check. */
14131 if (TARGET_VXWORKS_RTP
14132 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
14133 fputs ("(GOT)", asm_out_file);
14135 fputs ("(GOTOFF)", asm_out_file);
14137 fputc ('\n', asm_out_file);
14141 mode = GET_MODE (x);
14143 if (arm_vector_mode_supported_p (mode))
14147 gcc_assert (GET_CODE (x) == CONST_VECTOR);
14149 units = CONST_VECTOR_NUNITS (x);
14150 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
14152 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14153 for (i = 0; i < units; i++)
14155 rtx elt = CONST_VECTOR_ELT (x, i);
14157 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
14160 for (i = 0; i < units; i++)
14162 rtx elt = CONST_VECTOR_ELT (x, i);
14163 REAL_VALUE_TYPE rval;
14165 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
14168 (rval, GET_MODE_INNER (mode),
14169 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
14175 return default_assemble_integer (x, size, aligned_p);
14179 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
14183 if (!TARGET_AAPCS_BASED)
14186 default_named_section_asm_out_constructor
14187 : default_named_section_asm_out_destructor) (symbol, priority);
14191 /* Put these in the .init_array section, using a special relocation. */
14192 if (priority != DEFAULT_INIT_PRIORITY)
14195 sprintf (buf, "%s.%.5u",
14196 is_ctor ? ".init_array" : ".fini_array",
14198 s = get_section (buf, SECTION_WRITE, NULL_TREE);
14205 switch_to_section (s);
14206 assemble_align (POINTER_SIZE);
14207 fputs ("\t.word\t", asm_out_file);
14208 output_addr_const (asm_out_file, symbol);
14209 fputs ("(target1)\n", asm_out_file);
14212 /* Add a function to the list of static constructors. */
14215 arm_elf_asm_constructor (rtx symbol, int priority)
14217 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
14220 /* Add a function to the list of static destructors. */
14223 arm_elf_asm_destructor (rtx symbol, int priority)
14225 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
14228 /* A finite state machine takes care of noticing whether or not instructions
14229 can be conditionally executed, and thus decrease execution time and code
14230 size by deleting branch instructions. The fsm is controlled by
14231 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
14233 /* The state of the fsm controlling condition codes are:
14234 0: normal, do nothing special
14235 1: make ASM_OUTPUT_OPCODE not output this instruction
14236 2: make ASM_OUTPUT_OPCODE not output this instruction
14237 3: make instructions conditional
14238 4: make instructions conditional
14240 State transitions (state->state by whom under condition):
14241 0 -> 1 final_prescan_insn if the `target' is a label
14242 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
14243 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
14244 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
14245 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
14246 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
14247 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
14248 (the target insn is arm_target_insn).
14250 If the jump clobbers the conditions then we use states 2 and 4.
14252 A similar thing can be done with conditional return insns.
14254 XXX In case the `target' is an unconditional branch, this conditionalising
14255 of the instructions always reduces code size, but not always execution
14256 time. But then, I want to reduce the code size to somewhere near what
14257 /bin/cc produces. */
14259 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
14260 instructions. When a COND_EXEC instruction is seen the subsequent
14261 instructions are scanned so that multiple conditional instructions can be
14262 combined into a single IT block. arm_condexec_count and arm_condexec_mask
14263 specify the length and true/false mask for the IT block. These will be
14264 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
14266 /* Returns the index of the ARM condition code string in
14267 `arm_condition_codes'. COMPARISON should be an rtx like
14268 `(eq (...) (...))'. */
14269 static enum arm_cond_code
14270 get_arm_condition_code (rtx comparison)
14272 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
14273 enum arm_cond_code code;
14274 enum rtx_code comp_code = GET_CODE (comparison);
14276 if (GET_MODE_CLASS (mode) != MODE_CC)
14277 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
14278 XEXP (comparison, 1));
14282 case CC_DNEmode: code = ARM_NE; goto dominance;
14283 case CC_DEQmode: code = ARM_EQ; goto dominance;
14284 case CC_DGEmode: code = ARM_GE; goto dominance;
14285 case CC_DGTmode: code = ARM_GT; goto dominance;
14286 case CC_DLEmode: code = ARM_LE; goto dominance;
14287 case CC_DLTmode: code = ARM_LT; goto dominance;
14288 case CC_DGEUmode: code = ARM_CS; goto dominance;
14289 case CC_DGTUmode: code = ARM_HI; goto dominance;
14290 case CC_DLEUmode: code = ARM_LS; goto dominance;
14291 case CC_DLTUmode: code = ARM_CC;
14294 gcc_assert (comp_code == EQ || comp_code == NE);
14296 if (comp_code == EQ)
14297 return ARM_INVERSE_CONDITION_CODE (code);
14303 case NE: return ARM_NE;
14304 case EQ: return ARM_EQ;
14305 case GE: return ARM_PL;
14306 case LT: return ARM_MI;
14307 default: gcc_unreachable ();
14313 case NE: return ARM_NE;
14314 case EQ: return ARM_EQ;
14315 default: gcc_unreachable ();
14321 case NE: return ARM_MI;
14322 case EQ: return ARM_PL;
14323 default: gcc_unreachable ();
14328 /* These encodings assume that AC=1 in the FPA system control
14329 byte. This allows us to handle all cases except UNEQ and
14333 case GE: return ARM_GE;
14334 case GT: return ARM_GT;
14335 case LE: return ARM_LS;
14336 case LT: return ARM_MI;
14337 case NE: return ARM_NE;
14338 case EQ: return ARM_EQ;
14339 case ORDERED: return ARM_VC;
14340 case UNORDERED: return ARM_VS;
14341 case UNLT: return ARM_LT;
14342 case UNLE: return ARM_LE;
14343 case UNGT: return ARM_HI;
14344 case UNGE: return ARM_PL;
14345 /* UNEQ and LTGT do not have a representation. */
14346 case UNEQ: /* Fall through. */
14347 case LTGT: /* Fall through. */
14348 default: gcc_unreachable ();
14354 case NE: return ARM_NE;
14355 case EQ: return ARM_EQ;
14356 case GE: return ARM_LE;
14357 case GT: return ARM_LT;
14358 case LE: return ARM_GE;
14359 case LT: return ARM_GT;
14360 case GEU: return ARM_LS;
14361 case GTU: return ARM_CC;
14362 case LEU: return ARM_CS;
14363 case LTU: return ARM_HI;
14364 default: gcc_unreachable ();
14370 case LTU: return ARM_CS;
14371 case GEU: return ARM_CC;
14372 default: gcc_unreachable ();
14378 case NE: return ARM_NE;
14379 case EQ: return ARM_EQ;
14380 case GE: return ARM_GE;
14381 case GT: return ARM_GT;
14382 case LE: return ARM_LE;
14383 case LT: return ARM_LT;
14384 case GEU: return ARM_CS;
14385 case GTU: return ARM_HI;
14386 case LEU: return ARM_LS;
14387 case LTU: return ARM_CC;
14388 default: gcc_unreachable ();
14391 default: gcc_unreachable ();
14395 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
14398 thumb2_final_prescan_insn (rtx insn)
14400 rtx first_insn = insn;
14401 rtx body = PATTERN (insn);
14403 enum arm_cond_code code;
14407 /* Remove the previous insn from the count of insns to be output. */
14408 if (arm_condexec_count)
14409 arm_condexec_count--;
14411 /* Nothing to do if we are already inside a conditional block. */
14412 if (arm_condexec_count)
14415 if (GET_CODE (body) != COND_EXEC)
14418 /* Conditional jumps are implemented directly. */
14419 if (GET_CODE (insn) == JUMP_INSN)
14422 predicate = COND_EXEC_TEST (body);
14423 arm_current_cc = get_arm_condition_code (predicate);
14425 n = get_attr_ce_count (insn);
14426 arm_condexec_count = 1;
14427 arm_condexec_mask = (1 << n) - 1;
14428 arm_condexec_masklen = n;
14429 /* See if subsequent instructions can be combined into the same block. */
14432 insn = next_nonnote_insn (insn);
14434 /* Jumping into the middle of an IT block is illegal, so a label or
14435 barrier terminates the block. */
14436 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
14439 body = PATTERN (insn);
14440 /* USE and CLOBBER aren't really insns, so just skip them. */
14441 if (GET_CODE (body) == USE
14442 || GET_CODE (body) == CLOBBER)
14445 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
14446 if (GET_CODE (body) != COND_EXEC)
14448 /* Allow up to 4 conditionally executed instructions in a block. */
14449 n = get_attr_ce_count (insn);
14450 if (arm_condexec_masklen + n > 4)
14453 predicate = COND_EXEC_TEST (body);
14454 code = get_arm_condition_code (predicate);
14455 mask = (1 << n) - 1;
14456 if (arm_current_cc == code)
14457 arm_condexec_mask |= (mask << arm_condexec_masklen);
14458 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
14461 arm_condexec_count++;
14462 arm_condexec_masklen += n;
14464 /* A jump must be the last instruction in a conditional block. */
14465 if (GET_CODE(insn) == JUMP_INSN)
14468 /* Restore recog_data (getting the attributes of other insns can
14469 destroy this array, but final.c assumes that it remains intact
14470 across this call). */
14471 extract_constrain_insn_cached (first_insn);
14475 arm_final_prescan_insn (rtx insn)
14477 /* BODY will hold the body of INSN. */
14478 rtx body = PATTERN (insn);
14480 /* This will be 1 if trying to repeat the trick, and things need to be
14481 reversed if it appears to fail. */
14484 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
14485 taken are clobbered, even if the rtl suggests otherwise. It also
14486 means that we have to grub around within the jump expression to find
14487 out what the conditions are when the jump isn't taken. */
14488 int jump_clobbers = 0;
14490 /* If we start with a return insn, we only succeed if we find another one. */
14491 int seeking_return = 0;
14493 /* START_INSN will hold the insn from where we start looking. This is the
14494 first insn after the following code_label if REVERSE is true. */
14495 rtx start_insn = insn;
14497 /* If in state 4, check if the target branch is reached, in order to
14498 change back to state 0. */
14499 if (arm_ccfsm_state == 4)
14501 if (insn == arm_target_insn)
14503 arm_target_insn = NULL;
14504 arm_ccfsm_state = 0;
14509 /* If in state 3, it is possible to repeat the trick, if this insn is an
14510 unconditional branch to a label, and immediately following this branch
14511 is the previous target label which is only used once, and the label this
14512 branch jumps to is not too far off. */
14513 if (arm_ccfsm_state == 3)
14515 if (simplejump_p (insn))
14517 start_insn = next_nonnote_insn (start_insn);
14518 if (GET_CODE (start_insn) == BARRIER)
14520 /* XXX Isn't this always a barrier? */
14521 start_insn = next_nonnote_insn (start_insn);
14523 if (GET_CODE (start_insn) == CODE_LABEL
14524 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
14525 && LABEL_NUSES (start_insn) == 1)
14530 else if (GET_CODE (body) == RETURN)
14532 start_insn = next_nonnote_insn (start_insn);
14533 if (GET_CODE (start_insn) == BARRIER)
14534 start_insn = next_nonnote_insn (start_insn);
14535 if (GET_CODE (start_insn) == CODE_LABEL
14536 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
14537 && LABEL_NUSES (start_insn) == 1)
14540 seeking_return = 1;
14549 gcc_assert (!arm_ccfsm_state || reverse);
14550 if (GET_CODE (insn) != JUMP_INSN)
14553 /* This jump might be paralleled with a clobber of the condition codes
14554 the jump should always come first */
14555 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
14556 body = XVECEXP (body, 0, 0);
14559 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
14560 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
14563 int fail = FALSE, succeed = FALSE;
14564 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
14565 int then_not_else = TRUE;
14566 rtx this_insn = start_insn, label = 0;
14568 /* If the jump cannot be done with one instruction, we cannot
14569 conditionally execute the instruction in the inverse case. */
14570 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
14576 /* Register the insn jumped to. */
14579 if (!seeking_return)
14580 label = XEXP (SET_SRC (body), 0);
14582 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
14583 label = XEXP (XEXP (SET_SRC (body), 1), 0);
14584 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
14586 label = XEXP (XEXP (SET_SRC (body), 2), 0);
14587 then_not_else = FALSE;
14589 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
14590 seeking_return = 1;
14591 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
14593 seeking_return = 1;
14594 then_not_else = FALSE;
14597 gcc_unreachable ();
14599 /* See how many insns this branch skips, and what kind of insns. If all
14600 insns are okay, and the label or unconditional branch to the same
14601 label is not too far away, succeed. */
14602 for (insns_skipped = 0;
14603 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
14607 this_insn = next_nonnote_insn (this_insn);
14611 switch (GET_CODE (this_insn))
14614 /* Succeed if it is the target label, otherwise fail since
14615 control falls in from somewhere else. */
14616 if (this_insn == label)
14620 arm_ccfsm_state = 2;
14621 this_insn = next_nonnote_insn (this_insn);
14624 arm_ccfsm_state = 1;
14632 /* Succeed if the following insn is the target label.
14634 If return insns are used then the last insn in a function
14635 will be a barrier. */
14636 this_insn = next_nonnote_insn (this_insn);
14637 if (this_insn && this_insn == label)
14641 arm_ccfsm_state = 2;
14642 this_insn = next_nonnote_insn (this_insn);
14645 arm_ccfsm_state = 1;
14653 /* The AAPCS says that conditional calls should not be
14654 used since they make interworking inefficient (the
14655 linker can't transform BL<cond> into BLX). That's
14656 only a problem if the machine has BLX. */
14663 /* Succeed if the following insn is the target label, or
14664 if the following two insns are a barrier and the
14666 this_insn = next_nonnote_insn (this_insn);
14667 if (this_insn && GET_CODE (this_insn) == BARRIER)
14668 this_insn = next_nonnote_insn (this_insn);
14670 if (this_insn && this_insn == label
14671 && insns_skipped < max_insns_skipped)
14675 arm_ccfsm_state = 2;
14676 this_insn = next_nonnote_insn (this_insn);
14679 arm_ccfsm_state = 1;
14687 /* If this is an unconditional branch to the same label, succeed.
14688 If it is to another label, do nothing. If it is conditional,
14690 /* XXX Probably, the tests for SET and the PC are
14693 scanbody = PATTERN (this_insn);
14694 if (GET_CODE (scanbody) == SET
14695 && GET_CODE (SET_DEST (scanbody)) == PC)
14697 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
14698 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
14700 arm_ccfsm_state = 2;
14703 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
14706 /* Fail if a conditional return is undesirable (e.g. on a
14707 StrongARM), but still allow this if optimizing for size. */
14708 else if (GET_CODE (scanbody) == RETURN
14709 && !use_return_insn (TRUE, NULL)
14712 else if (GET_CODE (scanbody) == RETURN
14715 arm_ccfsm_state = 2;
14718 else if (GET_CODE (scanbody) == PARALLEL)
14720 switch (get_attr_conds (this_insn))
14730 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
14735 /* Instructions using or affecting the condition codes make it
14737 scanbody = PATTERN (this_insn);
14738 if (!(GET_CODE (scanbody) == SET
14739 || GET_CODE (scanbody) == PARALLEL)
14740 || get_attr_conds (this_insn) != CONDS_NOCOND)
14743 /* A conditional cirrus instruction must be followed by
14744 a non Cirrus instruction. However, since we
14745 conditionalize instructions in this function and by
14746 the time we get here we can't add instructions
14747 (nops), because shorten_branches() has already been
14748 called, we will disable conditionalizing Cirrus
14749 instructions to be safe. */
14750 if (GET_CODE (scanbody) != USE
14751 && GET_CODE (scanbody) != CLOBBER
14752 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
14762 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
14763 arm_target_label = CODE_LABEL_NUMBER (label);
14766 gcc_assert (seeking_return || arm_ccfsm_state == 2);
14768 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
14770 this_insn = next_nonnote_insn (this_insn);
14771 gcc_assert (!this_insn
14772 || (GET_CODE (this_insn) != BARRIER
14773 && GET_CODE (this_insn) != CODE_LABEL));
14777 /* Oh, dear! we ran off the end.. give up. */
14778 extract_constrain_insn_cached (insn);
14779 arm_ccfsm_state = 0;
14780 arm_target_insn = NULL;
14783 arm_target_insn = this_insn;
14787 gcc_assert (!reverse);
14789 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
14791 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
14792 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14793 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
14794 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14798 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
14801 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
14805 if (reverse || then_not_else)
14806 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
14809 /* Restore recog_data (getting the attributes of other insns can
14810 destroy this array, but final.c assumes that it remains intact
14811 across this call. */
14812 extract_constrain_insn_cached (insn);
14816 /* Output IT instructions. */
14818 thumb2_asm_output_opcode (FILE * stream)
14823 if (arm_condexec_mask)
14825 for (n = 0; n < arm_condexec_masklen; n++)
14826 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
14828 asm_fprintf(stream, "i%s\t%s\n\t", buff,
14829 arm_condition_codes[arm_current_cc]);
14830 arm_condexec_mask = 0;
14834 /* Returns true if REGNO is a valid register
14835 for holding a quantity of type MODE. */
14837 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
14839 if (GET_MODE_CLASS (mode) == MODE_CC)
14840 return (regno == CC_REGNUM
14841 || (TARGET_HARD_FLOAT && TARGET_VFP
14842 && regno == VFPCC_REGNUM));
14845 /* For the Thumb we only allow values bigger than SImode in
14846 registers 0 - 6, so that there is always a second low
14847 register available to hold the upper part of the value.
14848 We probably we ought to ensure that the register is the
14849 start of an even numbered register pair. */
14850 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
14852 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
14853 && IS_CIRRUS_REGNUM (regno))
14854 /* We have outlawed SI values in Cirrus registers because they
14855 reside in the lower 32 bits, but SF values reside in the
14856 upper 32 bits. This causes gcc all sorts of grief. We can't
14857 even split the registers into pairs because Cirrus SI values
14858 get sign extended to 64bits-- aldyh. */
14859 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
14861 if (TARGET_HARD_FLOAT && TARGET_VFP
14862 && IS_VFP_REGNUM (regno))
14864 if (mode == SFmode || mode == SImode)
14865 return VFP_REGNO_OK_FOR_SINGLE (regno);
14867 if (mode == DFmode)
14868 return VFP_REGNO_OK_FOR_DOUBLE (regno);
14870 /* VFP registers can hold HFmode values, but there is no point in
14871 putting them there unless we have the NEON extensions for
14872 loading/storing them, too. */
14873 if (mode == HFmode)
14874 return TARGET_NEON_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
14877 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
14878 || (VALID_NEON_QREG_MODE (mode)
14879 && NEON_REGNO_OK_FOR_QUAD (regno))
14880 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
14881 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
14882 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
14883 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
14884 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
14889 if (TARGET_REALLY_IWMMXT)
14891 if (IS_IWMMXT_GR_REGNUM (regno))
14892 return mode == SImode;
14894 if (IS_IWMMXT_REGNUM (regno))
14895 return VALID_IWMMXT_REG_MODE (mode);
14898 /* We allow almost any value to be stored in the general registers.
14899 Restrict doubleword quantities to even register pairs so that we can
14900 use ldrd. Do not allow very large Neon structure opaque modes in
14901 general registers; they would use too many. */
14902 if (regno <= LAST_ARM_REGNUM)
14903 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
14904 && ARM_NUM_REGS (mode) <= 4;
14906 if (regno == FRAME_POINTER_REGNUM
14907 || regno == ARG_POINTER_REGNUM)
14908 /* We only allow integers in the fake hard registers. */
14909 return GET_MODE_CLASS (mode) == MODE_INT;
14911 /* The only registers left are the FPA registers
14912 which we only allow to hold FP values. */
14913 return (TARGET_HARD_FLOAT && TARGET_FPA
14914 && GET_MODE_CLASS (mode) == MODE_FLOAT
14915 && regno >= FIRST_FPA_REGNUM
14916 && regno <= LAST_FPA_REGNUM);
14919 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
14920 not used in arm mode. */
14923 arm_regno_class (int regno)
14927 if (regno == STACK_POINTER_REGNUM)
14929 if (regno == CC_REGNUM)
14936 if (TARGET_THUMB2 && regno < 8)
14939 if ( regno <= LAST_ARM_REGNUM
14940 || regno == FRAME_POINTER_REGNUM
14941 || regno == ARG_POINTER_REGNUM)
14942 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
14944 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
14945 return TARGET_THUMB2 ? CC_REG : NO_REGS;
14947 if (IS_CIRRUS_REGNUM (regno))
14948 return CIRRUS_REGS;
14950 if (IS_VFP_REGNUM (regno))
14952 if (regno <= D7_VFP_REGNUM)
14953 return VFP_D0_D7_REGS;
14954 else if (regno <= LAST_LO_VFP_REGNUM)
14955 return VFP_LO_REGS;
14957 return VFP_HI_REGS;
14960 if (IS_IWMMXT_REGNUM (regno))
14961 return IWMMXT_REGS;
14963 if (IS_IWMMXT_GR_REGNUM (regno))
14964 return IWMMXT_GR_REGS;
14969 /* Handle a special case when computing the offset
14970 of an argument from the frame pointer. */
14972 arm_debugger_arg_offset (int value, rtx addr)
14976 /* We are only interested if dbxout_parms() failed to compute the offset. */
14980 /* We can only cope with the case where the address is held in a register. */
14981 if (GET_CODE (addr) != REG)
14984 /* If we are using the frame pointer to point at the argument, then
14985 an offset of 0 is correct. */
14986 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
14989 /* If we are using the stack pointer to point at the
14990 argument, then an offset of 0 is correct. */
14991 /* ??? Check this is consistent with thumb2 frame layout. */
14992 if ((TARGET_THUMB || !frame_pointer_needed)
14993 && REGNO (addr) == SP_REGNUM)
14996 /* Oh dear. The argument is pointed to by a register rather
14997 than being held in a register, or being stored at a known
14998 offset from the frame pointer. Since GDB only understands
14999 those two kinds of argument we must translate the address
15000 held in the register into an offset from the frame pointer.
15001 We do this by searching through the insns for the function
15002 looking to see where this register gets its value. If the
15003 register is initialized from the frame pointer plus an offset
15004 then we are in luck and we can continue, otherwise we give up.
15006 This code is exercised by producing debugging information
15007 for a function with arguments like this:
15009 double func (double a, double b, int c, double d) {return d;}
15011 Without this code the stab for parameter 'd' will be set to
15012 an offset of 0 from the frame pointer, rather than 8. */
15014 /* The if() statement says:
15016 If the insn is a normal instruction
15017 and if the insn is setting the value in a register
15018 and if the register being set is the register holding the address of the argument
15019 and if the address is computing by an addition
15020 that involves adding to a register
15021 which is the frame pointer
15026 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15028 if ( GET_CODE (insn) == INSN
15029 && GET_CODE (PATTERN (insn)) == SET
15030 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
15031 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
15032 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
15033 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
15034 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
15037 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
15046 warning (0, "unable to compute real location of stacked parameter");
15047 value = 8; /* XXX magic hack */
15053 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
15056 if ((MASK) & insn_flags) \
15057 add_builtin_function ((NAME), (TYPE), (CODE), \
15058 BUILT_IN_MD, NULL, NULL_TREE); \
15062 struct builtin_description
15064 const unsigned int mask;
15065 const enum insn_code icode;
15066 const char * const name;
15067 const enum arm_builtins code;
15068 const enum rtx_code comparison;
15069 const unsigned int flag;
15072 static const struct builtin_description bdesc_2arg[] =
15074 #define IWMMXT_BUILTIN(code, string, builtin) \
15075 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
15076 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
15078 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
15079 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
15080 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
15081 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
15082 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
15083 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
15084 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
15085 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
15086 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
15087 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
15088 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
15089 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
15090 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
15091 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
15092 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
15093 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
15094 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
15095 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
15096 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
15097 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
15098 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
15099 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
15100 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
15101 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
15102 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
15103 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
15104 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
15105 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
15106 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
15107 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
15108 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
15109 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
15110 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
15111 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
15112 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
15113 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
15114 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
15115 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
15116 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
15117 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
15118 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
15119 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
15120 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
15121 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
15122 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
15123 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
15124 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
15125 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
15126 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
15127 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
15128 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
15129 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
15130 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
15131 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
15132 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
15133 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
15134 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
15135 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
15137 #define IWMMXT_BUILTIN2(code, builtin) \
15138 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
15140 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
15141 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
15142 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
15143 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
15144 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
15145 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
15146 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
15147 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
15148 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
15149 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
15150 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
15151 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
15152 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
15153 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
15154 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
15155 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
15156 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
15157 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
15158 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
15159 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
15160 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
15161 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
15162 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
15163 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
15164 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
15165 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
15166 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
15167 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
15168 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
15169 IWMMXT_BUILTIN2 (rordi3, WRORDI)
15170 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
15171 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
15174 static const struct builtin_description bdesc_1arg[] =
15176 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
15177 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
15178 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
15179 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
15180 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
15181 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
15182 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
15183 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
15184 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
15185 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
15186 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
15187 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
15188 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
15189 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
15190 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
15191 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
15192 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
15193 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
15196 /* Set up all the iWMMXt builtins. This is
15197 not called if TARGET_IWMMXT is zero. */
15200 arm_init_iwmmxt_builtins (void)
15202 const struct builtin_description * d;
15204 tree endlink = void_list_node;
15206 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
15207 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
15208 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
15211 = build_function_type (integer_type_node,
15212 tree_cons (NULL_TREE, integer_type_node, endlink));
15213 tree v8qi_ftype_v8qi_v8qi_int
15214 = build_function_type (V8QI_type_node,
15215 tree_cons (NULL_TREE, V8QI_type_node,
15216 tree_cons (NULL_TREE, V8QI_type_node,
15217 tree_cons (NULL_TREE,
15220 tree v4hi_ftype_v4hi_int
15221 = build_function_type (V4HI_type_node,
15222 tree_cons (NULL_TREE, V4HI_type_node,
15223 tree_cons (NULL_TREE, integer_type_node,
15225 tree v2si_ftype_v2si_int
15226 = build_function_type (V2SI_type_node,
15227 tree_cons (NULL_TREE, V2SI_type_node,
15228 tree_cons (NULL_TREE, integer_type_node,
15230 tree v2si_ftype_di_di
15231 = build_function_type (V2SI_type_node,
15232 tree_cons (NULL_TREE, long_long_integer_type_node,
15233 tree_cons (NULL_TREE, long_long_integer_type_node,
15235 tree di_ftype_di_int
15236 = build_function_type (long_long_integer_type_node,
15237 tree_cons (NULL_TREE, long_long_integer_type_node,
15238 tree_cons (NULL_TREE, integer_type_node,
15240 tree di_ftype_di_int_int
15241 = build_function_type (long_long_integer_type_node,
15242 tree_cons (NULL_TREE, long_long_integer_type_node,
15243 tree_cons (NULL_TREE, integer_type_node,
15244 tree_cons (NULL_TREE,
15247 tree int_ftype_v8qi
15248 = build_function_type (integer_type_node,
15249 tree_cons (NULL_TREE, V8QI_type_node,
15251 tree int_ftype_v4hi
15252 = build_function_type (integer_type_node,
15253 tree_cons (NULL_TREE, V4HI_type_node,
15255 tree int_ftype_v2si
15256 = build_function_type (integer_type_node,
15257 tree_cons (NULL_TREE, V2SI_type_node,
15259 tree int_ftype_v8qi_int
15260 = build_function_type (integer_type_node,
15261 tree_cons (NULL_TREE, V8QI_type_node,
15262 tree_cons (NULL_TREE, integer_type_node,
15264 tree int_ftype_v4hi_int
15265 = build_function_type (integer_type_node,
15266 tree_cons (NULL_TREE, V4HI_type_node,
15267 tree_cons (NULL_TREE, integer_type_node,
15269 tree int_ftype_v2si_int
15270 = build_function_type (integer_type_node,
15271 tree_cons (NULL_TREE, V2SI_type_node,
15272 tree_cons (NULL_TREE, integer_type_node,
15274 tree v8qi_ftype_v8qi_int_int
15275 = build_function_type (V8QI_type_node,
15276 tree_cons (NULL_TREE, V8QI_type_node,
15277 tree_cons (NULL_TREE, integer_type_node,
15278 tree_cons (NULL_TREE,
15281 tree v4hi_ftype_v4hi_int_int
15282 = build_function_type (V4HI_type_node,
15283 tree_cons (NULL_TREE, V4HI_type_node,
15284 tree_cons (NULL_TREE, integer_type_node,
15285 tree_cons (NULL_TREE,
15288 tree v2si_ftype_v2si_int_int
15289 = build_function_type (V2SI_type_node,
15290 tree_cons (NULL_TREE, V2SI_type_node,
15291 tree_cons (NULL_TREE, integer_type_node,
15292 tree_cons (NULL_TREE,
15295 /* Miscellaneous. */
15296 tree v8qi_ftype_v4hi_v4hi
15297 = build_function_type (V8QI_type_node,
15298 tree_cons (NULL_TREE, V4HI_type_node,
15299 tree_cons (NULL_TREE, V4HI_type_node,
15301 tree v4hi_ftype_v2si_v2si
15302 = build_function_type (V4HI_type_node,
15303 tree_cons (NULL_TREE, V2SI_type_node,
15304 tree_cons (NULL_TREE, V2SI_type_node,
15306 tree v2si_ftype_v4hi_v4hi
15307 = build_function_type (V2SI_type_node,
15308 tree_cons (NULL_TREE, V4HI_type_node,
15309 tree_cons (NULL_TREE, V4HI_type_node,
15311 tree v2si_ftype_v8qi_v8qi
15312 = build_function_type (V2SI_type_node,
15313 tree_cons (NULL_TREE, V8QI_type_node,
15314 tree_cons (NULL_TREE, V8QI_type_node,
15316 tree v4hi_ftype_v4hi_di
15317 = build_function_type (V4HI_type_node,
15318 tree_cons (NULL_TREE, V4HI_type_node,
15319 tree_cons (NULL_TREE,
15320 long_long_integer_type_node,
15322 tree v2si_ftype_v2si_di
15323 = build_function_type (V2SI_type_node,
15324 tree_cons (NULL_TREE, V2SI_type_node,
15325 tree_cons (NULL_TREE,
15326 long_long_integer_type_node,
15328 tree void_ftype_int_int
15329 = build_function_type (void_type_node,
15330 tree_cons (NULL_TREE, integer_type_node,
15331 tree_cons (NULL_TREE, integer_type_node,
15334 = build_function_type (long_long_unsigned_type_node, endlink);
15336 = build_function_type (long_long_integer_type_node,
15337 tree_cons (NULL_TREE, V8QI_type_node,
15340 = build_function_type (long_long_integer_type_node,
15341 tree_cons (NULL_TREE, V4HI_type_node,
15344 = build_function_type (long_long_integer_type_node,
15345 tree_cons (NULL_TREE, V2SI_type_node,
15347 tree v2si_ftype_v4hi
15348 = build_function_type (V2SI_type_node,
15349 tree_cons (NULL_TREE, V4HI_type_node,
15351 tree v4hi_ftype_v8qi
15352 = build_function_type (V4HI_type_node,
15353 tree_cons (NULL_TREE, V8QI_type_node,
15356 tree di_ftype_di_v4hi_v4hi
15357 = build_function_type (long_long_unsigned_type_node,
15358 tree_cons (NULL_TREE,
15359 long_long_unsigned_type_node,
15360 tree_cons (NULL_TREE, V4HI_type_node,
15361 tree_cons (NULL_TREE,
15365 tree di_ftype_v4hi_v4hi
15366 = build_function_type (long_long_unsigned_type_node,
15367 tree_cons (NULL_TREE, V4HI_type_node,
15368 tree_cons (NULL_TREE, V4HI_type_node,
15371 /* Normal vector binops. */
15372 tree v8qi_ftype_v8qi_v8qi
15373 = build_function_type (V8QI_type_node,
15374 tree_cons (NULL_TREE, V8QI_type_node,
15375 tree_cons (NULL_TREE, V8QI_type_node,
15377 tree v4hi_ftype_v4hi_v4hi
15378 = build_function_type (V4HI_type_node,
15379 tree_cons (NULL_TREE, V4HI_type_node,
15380 tree_cons (NULL_TREE, V4HI_type_node,
15382 tree v2si_ftype_v2si_v2si
15383 = build_function_type (V2SI_type_node,
15384 tree_cons (NULL_TREE, V2SI_type_node,
15385 tree_cons (NULL_TREE, V2SI_type_node,
15387 tree di_ftype_di_di
15388 = build_function_type (long_long_unsigned_type_node,
15389 tree_cons (NULL_TREE, long_long_unsigned_type_node,
15390 tree_cons (NULL_TREE,
15391 long_long_unsigned_type_node,
15394 /* Add all builtins that are more or less simple operations on two
15396 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15398 /* Use one of the operands; the target can have a different mode for
15399 mask-generating compares. */
15400 enum machine_mode mode;
15406 mode = insn_data[d->icode].operand[1].mode;
15411 type = v8qi_ftype_v8qi_v8qi;
15414 type = v4hi_ftype_v4hi_v4hi;
15417 type = v2si_ftype_v2si_v2si;
15420 type = di_ftype_di_di;
15424 gcc_unreachable ();
15427 def_mbuiltin (d->mask, d->name, type, d->code);
15430 /* Add the remaining MMX insns with somewhat more complicated types. */
15431 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
15432 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
15433 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
15435 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
15436 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
15437 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
15438 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
15439 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
15440 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
15442 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
15443 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
15444 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
15445 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
15446 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
15447 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
15449 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
15450 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
15451 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
15452 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
15453 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
15454 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
15456 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
15457 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
15458 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
15459 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
15460 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
15461 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
15463 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
15465 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
15466 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
15467 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
15468 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
15470 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
15471 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
15472 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
15473 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
15474 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
15475 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
15476 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
15477 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
15478 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
15480 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
15481 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
15482 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
15484 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
15485 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
15486 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
15488 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
15489 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
15490 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
15491 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
15492 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
15493 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
15495 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
15496 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
15497 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
15498 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
15499 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
15500 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
15501 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
15502 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
15503 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
15504 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
15505 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
15506 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
15508 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
15509 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
15510 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
15511 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
15513 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
15514 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
15515 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
15516 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
15517 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
15518 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
15519 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
15523 arm_init_tls_builtins (void)
15527 ftype = build_function_type (ptr_type_node, void_list_node);
15528 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
15529 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
15531 TREE_NOTHROW (decl) = 1;
15532 TREE_READONLY (decl) = 1;
15535 enum neon_builtin_type_bits {
15551 #define v8qi_UP T_V8QI
15552 #define v4hi_UP T_V4HI
15553 #define v2si_UP T_V2SI
15554 #define v2sf_UP T_V2SF
15556 #define v16qi_UP T_V16QI
15557 #define v8hi_UP T_V8HI
15558 #define v4si_UP T_V4SI
15559 #define v4sf_UP T_V4SF
15560 #define v2di_UP T_V2DI
15565 #define UP(X) X##_UP
15600 NEON_LOADSTRUCTLANE,
15602 NEON_STORESTRUCTLANE,
15611 const neon_itype itype;
15613 const enum insn_code codes[T_MAX];
15614 const unsigned int num_vars;
15615 unsigned int base_fcode;
15616 } neon_builtin_datum;
15618 #define CF(N,X) CODE_FOR_neon_##N##X
15620 #define VAR1(T, N, A) \
15621 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
15622 #define VAR2(T, N, A, B) \
15623 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
15624 #define VAR3(T, N, A, B, C) \
15625 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
15626 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
15627 #define VAR4(T, N, A, B, C, D) \
15628 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
15629 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
15630 #define VAR5(T, N, A, B, C, D, E) \
15631 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
15632 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
15633 #define VAR6(T, N, A, B, C, D, E, F) \
15634 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
15635 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
15636 #define VAR7(T, N, A, B, C, D, E, F, G) \
15637 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
15638 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15640 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
15641 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
15643 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15644 CF (N, G), CF (N, H) }, 8, 0
15645 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
15646 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
15647 | UP (H) | UP (I), \
15648 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15649 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
15650 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
15651 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
15652 | UP (H) | UP (I) | UP (J), \
15653 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
15654 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
15656 /* The mode entries in the following table correspond to the "key" type of the
15657 instruction variant, i.e. equivalent to that which would be specified after
15658 the assembler mnemonic, which usually refers to the last vector operand.
15659 (Signed/unsigned/polynomial types are not differentiated between though, and
15660 are all mapped onto the same mode for a given element size.) The modes
15661 listed per instruction should be the same as those defined for that
15662 instruction's pattern in neon.md.
15663 WARNING: Variants should be listed in the same increasing order as
15664 neon_builtin_type_bits. */
15666 static neon_builtin_datum neon_builtin_data[] =
15668 { VAR10 (BINOP, vadd,
15669 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15670 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
15671 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
15672 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15673 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15674 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
15675 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15676 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15677 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
15678 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15679 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
15680 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
15681 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
15682 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
15683 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
15684 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
15685 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
15686 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
15687 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
15688 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
15689 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
15690 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
15691 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15692 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15693 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15694 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
15695 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
15696 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
15697 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15698 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15699 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15700 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
15701 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15702 { VAR10 (BINOP, vsub,
15703 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15704 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
15705 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
15706 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15707 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15708 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
15709 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15710 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15711 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15712 { VAR2 (BINOP, vcage, v2sf, v4sf) },
15713 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
15714 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15715 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15716 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
15717 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15718 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
15719 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15720 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15721 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
15722 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15723 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15724 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
15725 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
15726 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
15727 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
15728 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15729 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
15730 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15731 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15732 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15733 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15734 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15735 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15736 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
15737 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
15738 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
15739 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
15740 /* FIXME: vget_lane supports more variants than this! */
15741 { VAR10 (GETLANE, vget_lane,
15742 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15743 { VAR10 (SETLANE, vset_lane,
15744 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15745 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
15746 { VAR10 (DUP, vdup_n,
15747 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15748 { VAR10 (DUPLANE, vdup_lane,
15749 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15750 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
15751 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
15752 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
15753 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
15754 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
15755 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
15756 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
15757 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15758 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15759 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
15760 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
15761 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15762 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
15763 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
15764 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15765 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15766 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
15767 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
15768 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15769 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
15770 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
15771 { VAR10 (BINOP, vext,
15772 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15773 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15774 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
15775 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
15776 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
15777 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
15778 { VAR10 (SELECT, vbsl,
15779 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15780 { VAR1 (VTBL, vtbl1, v8qi) },
15781 { VAR1 (VTBL, vtbl2, v8qi) },
15782 { VAR1 (VTBL, vtbl3, v8qi) },
15783 { VAR1 (VTBL, vtbl4, v8qi) },
15784 { VAR1 (VTBX, vtbx1, v8qi) },
15785 { VAR1 (VTBX, vtbx2, v8qi) },
15786 { VAR1 (VTBX, vtbx3, v8qi) },
15787 { VAR1 (VTBX, vtbx4, v8qi) },
15788 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15789 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15790 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
15791 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
15792 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
15793 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
15794 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
15795 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
15796 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
15797 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
15798 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
15799 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
15800 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
15801 { VAR10 (LOAD1, vld1,
15802 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15803 { VAR10 (LOAD1LANE, vld1_lane,
15804 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15805 { VAR10 (LOAD1, vld1_dup,
15806 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15807 { VAR10 (STORE1, vst1,
15808 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15809 { VAR10 (STORE1LANE, vst1_lane,
15810 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15811 { VAR9 (LOADSTRUCT,
15812 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15813 { VAR7 (LOADSTRUCTLANE, vld2_lane,
15814 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15815 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
15816 { VAR9 (STORESTRUCT, vst2,
15817 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15818 { VAR7 (STORESTRUCTLANE, vst2_lane,
15819 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15820 { VAR9 (LOADSTRUCT,
15821 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15822 { VAR7 (LOADSTRUCTLANE, vld3_lane,
15823 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15824 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
15825 { VAR9 (STORESTRUCT, vst3,
15826 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15827 { VAR7 (STORESTRUCTLANE, vst3_lane,
15828 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15829 { VAR9 (LOADSTRUCT, vld4,
15830 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15831 { VAR7 (LOADSTRUCTLANE, vld4_lane,
15832 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15833 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
15834 { VAR9 (STORESTRUCT, vst4,
15835 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
15836 { VAR7 (STORESTRUCTLANE, vst4_lane,
15837 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
15838 { VAR10 (LOGICBINOP, vand,
15839 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15840 { VAR10 (LOGICBINOP, vorr,
15841 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15842 { VAR10 (BINOP, veor,
15843 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15844 { VAR10 (LOGICBINOP, vbic,
15845 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
15846 { VAR10 (LOGICBINOP, vorn,
15847 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
15863 arm_init_neon_builtins (void)
15865 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
15867 tree neon_intQI_type_node;
15868 tree neon_intHI_type_node;
15869 tree neon_polyQI_type_node;
15870 tree neon_polyHI_type_node;
15871 tree neon_intSI_type_node;
15872 tree neon_intDI_type_node;
15873 tree neon_float_type_node;
15875 tree intQI_pointer_node;
15876 tree intHI_pointer_node;
15877 tree intSI_pointer_node;
15878 tree intDI_pointer_node;
15879 tree float_pointer_node;
15881 tree const_intQI_node;
15882 tree const_intHI_node;
15883 tree const_intSI_node;
15884 tree const_intDI_node;
15885 tree const_float_node;
15887 tree const_intQI_pointer_node;
15888 tree const_intHI_pointer_node;
15889 tree const_intSI_pointer_node;
15890 tree const_intDI_pointer_node;
15891 tree const_float_pointer_node;
15893 tree V8QI_type_node;
15894 tree V4HI_type_node;
15895 tree V2SI_type_node;
15896 tree V2SF_type_node;
15897 tree V16QI_type_node;
15898 tree V8HI_type_node;
15899 tree V4SI_type_node;
15900 tree V4SF_type_node;
15901 tree V2DI_type_node;
15903 tree intUQI_type_node;
15904 tree intUHI_type_node;
15905 tree intUSI_type_node;
15906 tree intUDI_type_node;
15908 tree intEI_type_node;
15909 tree intOI_type_node;
15910 tree intCI_type_node;
15911 tree intXI_type_node;
15913 tree V8QI_pointer_node;
15914 tree V4HI_pointer_node;
15915 tree V2SI_pointer_node;
15916 tree V2SF_pointer_node;
15917 tree V16QI_pointer_node;
15918 tree V8HI_pointer_node;
15919 tree V4SI_pointer_node;
15920 tree V4SF_pointer_node;
15921 tree V2DI_pointer_node;
15923 tree void_ftype_pv8qi_v8qi_v8qi;
15924 tree void_ftype_pv4hi_v4hi_v4hi;
15925 tree void_ftype_pv2si_v2si_v2si;
15926 tree void_ftype_pv2sf_v2sf_v2sf;
15927 tree void_ftype_pdi_di_di;
15928 tree void_ftype_pv16qi_v16qi_v16qi;
15929 tree void_ftype_pv8hi_v8hi_v8hi;
15930 tree void_ftype_pv4si_v4si_v4si;
15931 tree void_ftype_pv4sf_v4sf_v4sf;
15932 tree void_ftype_pv2di_v2di_v2di;
15934 tree reinterp_ftype_dreg[5][5];
15935 tree reinterp_ftype_qreg[5][5];
15936 tree dreg_types[5], qreg_types[5];
15938 /* Create distinguished type nodes for NEON vector element types,
15939 and pointers to values of such types, so we can detect them later. */
15940 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15941 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15942 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15943 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15944 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
15945 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
15946 neon_float_type_node = make_node (REAL_TYPE);
15947 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
15948 layout_type (neon_float_type_node);
15950 /* Define typedefs which exactly correspond to the modes we are basing vector
15951 types on. If you change these names you'll need to change
15952 the table used by arm_mangle_type too. */
15953 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
15954 "__builtin_neon_qi");
15955 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
15956 "__builtin_neon_hi");
15957 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
15958 "__builtin_neon_si");
15959 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
15960 "__builtin_neon_sf");
15961 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
15962 "__builtin_neon_di");
15963 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
15964 "__builtin_neon_poly8");
15965 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
15966 "__builtin_neon_poly16");
15968 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
15969 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
15970 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
15971 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
15972 float_pointer_node = build_pointer_type (neon_float_type_node);
15974 /* Next create constant-qualified versions of the above types. */
15975 const_intQI_node = build_qualified_type (neon_intQI_type_node,
15977 const_intHI_node = build_qualified_type (neon_intHI_type_node,
15979 const_intSI_node = build_qualified_type (neon_intSI_type_node,
15981 const_intDI_node = build_qualified_type (neon_intDI_type_node,
15983 const_float_node = build_qualified_type (neon_float_type_node,
15986 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
15987 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
15988 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
15989 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
15990 const_float_pointer_node = build_pointer_type (const_float_node);
15992 /* Now create vector types based on our NEON element types. */
15993 /* 64-bit vectors. */
15995 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
15997 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
15999 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
16001 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
16002 /* 128-bit vectors. */
16004 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
16006 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
16008 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
16010 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
16012 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
16014 /* Unsigned integer types for various mode sizes. */
16015 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
16016 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
16017 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
16018 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
16020 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
16021 "__builtin_neon_uqi");
16022 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
16023 "__builtin_neon_uhi");
16024 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
16025 "__builtin_neon_usi");
16026 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
16027 "__builtin_neon_udi");
16029 /* Opaque integer types for structures of vectors. */
16030 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
16031 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
16032 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
16033 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
16035 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
16036 "__builtin_neon_ti");
16037 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
16038 "__builtin_neon_ei");
16039 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
16040 "__builtin_neon_oi");
16041 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
16042 "__builtin_neon_ci");
16043 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
16044 "__builtin_neon_xi");
16046 /* Pointers to vector types. */
16047 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
16048 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
16049 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
16050 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
16051 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
16052 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
16053 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
16054 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
16055 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
16057 /* Operations which return results as pairs. */
16058 void_ftype_pv8qi_v8qi_v8qi =
16059 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
16060 V8QI_type_node, NULL);
16061 void_ftype_pv4hi_v4hi_v4hi =
16062 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
16063 V4HI_type_node, NULL);
16064 void_ftype_pv2si_v2si_v2si =
16065 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
16066 V2SI_type_node, NULL);
16067 void_ftype_pv2sf_v2sf_v2sf =
16068 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
16069 V2SF_type_node, NULL);
16070 void_ftype_pdi_di_di =
16071 build_function_type_list (void_type_node, intDI_pointer_node,
16072 neon_intDI_type_node, neon_intDI_type_node, NULL);
16073 void_ftype_pv16qi_v16qi_v16qi =
16074 build_function_type_list (void_type_node, V16QI_pointer_node,
16075 V16QI_type_node, V16QI_type_node, NULL);
16076 void_ftype_pv8hi_v8hi_v8hi =
16077 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
16078 V8HI_type_node, NULL);
16079 void_ftype_pv4si_v4si_v4si =
16080 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
16081 V4SI_type_node, NULL);
16082 void_ftype_pv4sf_v4sf_v4sf =
16083 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
16084 V4SF_type_node, NULL);
16085 void_ftype_pv2di_v2di_v2di =
16086 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
16087 V2DI_type_node, NULL);
16089 dreg_types[0] = V8QI_type_node;
16090 dreg_types[1] = V4HI_type_node;
16091 dreg_types[2] = V2SI_type_node;
16092 dreg_types[3] = V2SF_type_node;
16093 dreg_types[4] = neon_intDI_type_node;
16095 qreg_types[0] = V16QI_type_node;
16096 qreg_types[1] = V8HI_type_node;
16097 qreg_types[2] = V4SI_type_node;
16098 qreg_types[3] = V4SF_type_node;
16099 qreg_types[4] = V2DI_type_node;
16101 for (i = 0; i < 5; i++)
16104 for (j = 0; j < 5; j++)
16106 reinterp_ftype_dreg[i][j]
16107 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
16108 reinterp_ftype_qreg[i][j]
16109 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
16113 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
16115 neon_builtin_datum *d = &neon_builtin_data[i];
16116 unsigned int j, codeidx = 0;
16118 d->base_fcode = fcode;
16120 for (j = 0; j < T_MAX; j++)
16122 const char* const modenames[] = {
16123 "v8qi", "v4hi", "v2si", "v2sf", "di",
16124 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
16128 enum insn_code icode;
16129 int is_load = 0, is_store = 0;
16131 if ((d->bits & (1 << j)) == 0)
16134 icode = d->codes[codeidx++];
16139 case NEON_LOAD1LANE:
16140 case NEON_LOADSTRUCT:
16141 case NEON_LOADSTRUCTLANE:
16143 /* Fall through. */
16145 case NEON_STORE1LANE:
16146 case NEON_STORESTRUCT:
16147 case NEON_STORESTRUCTLANE:
16150 /* Fall through. */
16153 case NEON_LOGICBINOP:
16154 case NEON_SHIFTINSERT:
16161 case NEON_SHIFTIMM:
16162 case NEON_SHIFTACC:
16168 case NEON_LANEMULL:
16169 case NEON_LANEMULH:
16171 case NEON_SCALARMUL:
16172 case NEON_SCALARMULL:
16173 case NEON_SCALARMULH:
16174 case NEON_SCALARMAC:
16180 tree return_type = void_type_node, args = void_list_node;
16182 /* Build a function type directly from the insn_data for this
16183 builtin. The build_function_type() function takes care of
16184 removing duplicates for us. */
16185 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
16189 if (is_load && k == 1)
16191 /* Neon load patterns always have the memory operand
16192 (a SImode pointer) in the operand 1 position. We
16193 want a const pointer to the element type in that
16195 gcc_assert (insn_data[icode].operand[k].mode == SImode);
16201 eltype = const_intQI_pointer_node;
16206 eltype = const_intHI_pointer_node;
16211 eltype = const_intSI_pointer_node;
16216 eltype = const_float_pointer_node;
16221 eltype = const_intDI_pointer_node;
16224 default: gcc_unreachable ();
16227 else if (is_store && k == 0)
16229 /* Similarly, Neon store patterns use operand 0 as
16230 the memory location to store to (a SImode pointer).
16231 Use a pointer to the element type of the store in
16233 gcc_assert (insn_data[icode].operand[k].mode == SImode);
16239 eltype = intQI_pointer_node;
16244 eltype = intHI_pointer_node;
16249 eltype = intSI_pointer_node;
16254 eltype = float_pointer_node;
16259 eltype = intDI_pointer_node;
16262 default: gcc_unreachable ();
16267 switch (insn_data[icode].operand[k].mode)
16269 case VOIDmode: eltype = void_type_node; break;
16271 case QImode: eltype = neon_intQI_type_node; break;
16272 case HImode: eltype = neon_intHI_type_node; break;
16273 case SImode: eltype = neon_intSI_type_node; break;
16274 case SFmode: eltype = neon_float_type_node; break;
16275 case DImode: eltype = neon_intDI_type_node; break;
16276 case TImode: eltype = intTI_type_node; break;
16277 case EImode: eltype = intEI_type_node; break;
16278 case OImode: eltype = intOI_type_node; break;
16279 case CImode: eltype = intCI_type_node; break;
16280 case XImode: eltype = intXI_type_node; break;
16281 /* 64-bit vectors. */
16282 case V8QImode: eltype = V8QI_type_node; break;
16283 case V4HImode: eltype = V4HI_type_node; break;
16284 case V2SImode: eltype = V2SI_type_node; break;
16285 case V2SFmode: eltype = V2SF_type_node; break;
16286 /* 128-bit vectors. */
16287 case V16QImode: eltype = V16QI_type_node; break;
16288 case V8HImode: eltype = V8HI_type_node; break;
16289 case V4SImode: eltype = V4SI_type_node; break;
16290 case V4SFmode: eltype = V4SF_type_node; break;
16291 case V2DImode: eltype = V2DI_type_node; break;
16292 default: gcc_unreachable ();
16296 if (k == 0 && !is_store)
16297 return_type = eltype;
16299 args = tree_cons (NULL_TREE, eltype, args);
16302 ftype = build_function_type (return_type, args);
16306 case NEON_RESULTPAIR:
16308 switch (insn_data[icode].operand[1].mode)
16310 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
16311 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
16312 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
16313 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
16314 case DImode: ftype = void_ftype_pdi_di_di; break;
16315 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
16316 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
16317 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
16318 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
16319 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
16320 default: gcc_unreachable ();
16325 case NEON_REINTERP:
16327 /* We iterate over 5 doubleword types, then 5 quadword
16330 switch (insn_data[icode].operand[0].mode)
16332 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
16333 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
16334 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
16335 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
16336 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
16337 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
16338 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
16339 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
16340 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
16341 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
16342 default: gcc_unreachable ();
16348 gcc_unreachable ();
16351 gcc_assert (ftype != NULL);
16353 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
16355 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
16362 arm_init_fp16_builtins (void)
16364 tree fp16_type = make_node (REAL_TYPE);
16365 TYPE_PRECISION (fp16_type) = 16;
16366 layout_type (fp16_type);
16367 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
16371 arm_init_builtins (void)
16373 arm_init_tls_builtins ();
16375 if (TARGET_REALLY_IWMMXT)
16376 arm_init_iwmmxt_builtins ();
16379 arm_init_neon_builtins ();
16381 if (arm_fp16_format)
16382 arm_init_fp16_builtins ();
16385 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
16387 static const char *
16388 arm_invalid_parameter_type (const_tree t)
16390 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
16391 return N_("function parameters cannot have __fp16 type");
16395 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
16397 static const char *
16398 arm_invalid_return_type (const_tree t)
16400 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
16401 return N_("functions cannot return __fp16 type");
16405 /* Implement TARGET_PROMOTED_TYPE. */
16408 arm_promoted_type (const_tree t)
16410 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
16411 return float_type_node;
16415 /* Implement TARGET_CONVERT_TO_TYPE.
16416 Specifically, this hook implements the peculiarity of the ARM
16417 half-precision floating-point C semantics that requires conversions between
16418 __fp16 to or from double to do an intermediate conversion to float. */
16421 arm_convert_to_type (tree type, tree expr)
16423 tree fromtype = TREE_TYPE (expr);
16424 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
16426 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
16427 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
16428 return convert (type, convert (float_type_node, expr));
16432 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
16433 This simply adds HFmode as a supported mode; even though we don't
16434 implement arithmetic on this type directly, it's supported by
16435 optabs conversions, much the way the double-word arithmetic is
16436 special-cased in the default hook. */
16439 arm_scalar_mode_supported_p (enum machine_mode mode)
16441 if (mode == HFmode)
16442 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
16444 return default_scalar_mode_supported_p (mode);
16447 /* Errors in the source file can cause expand_expr to return const0_rtx
16448 where we expect a vector. To avoid crashing, use one of the vector
16449 clear instructions. */
16452 safe_vector_operand (rtx x, enum machine_mode mode)
16454 if (x != const0_rtx)
16456 x = gen_reg_rtx (mode);
16458 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
16459 : gen_rtx_SUBREG (DImode, x, 0)));
16463 /* Subroutine of arm_expand_builtin to take care of binop insns. */
16466 arm_expand_binop_builtin (enum insn_code icode,
16467 tree exp, rtx target)
16470 tree arg0 = CALL_EXPR_ARG (exp, 0);
16471 tree arg1 = CALL_EXPR_ARG (exp, 1);
16472 rtx op0 = expand_normal (arg0);
16473 rtx op1 = expand_normal (arg1);
16474 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16475 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16476 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16478 if (VECTOR_MODE_P (mode0))
16479 op0 = safe_vector_operand (op0, mode0);
16480 if (VECTOR_MODE_P (mode1))
16481 op1 = safe_vector_operand (op1, mode1);
16484 || GET_MODE (target) != tmode
16485 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16486 target = gen_reg_rtx (tmode);
16488 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
16490 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16491 op0 = copy_to_mode_reg (mode0, op0);
16492 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16493 op1 = copy_to_mode_reg (mode1, op1);
16495 pat = GEN_FCN (icode) (target, op0, op1);
16502 /* Subroutine of arm_expand_builtin to take care of unop insns. */
16505 arm_expand_unop_builtin (enum insn_code icode,
16506 tree exp, rtx target, int do_load)
16509 tree arg0 = CALL_EXPR_ARG (exp, 0);
16510 rtx op0 = expand_normal (arg0);
16511 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16512 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16515 || GET_MODE (target) != tmode
16516 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16517 target = gen_reg_rtx (tmode);
16519 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16522 if (VECTOR_MODE_P (mode0))
16523 op0 = safe_vector_operand (op0, mode0);
16525 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16526 op0 = copy_to_mode_reg (mode0, op0);
16529 pat = GEN_FCN (icode) (target, op0);
16537 neon_builtin_compare (const void *a, const void *b)
16539 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
16540 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
16541 unsigned int soughtcode = key->base_fcode;
16543 if (soughtcode >= memb->base_fcode
16544 && soughtcode < memb->base_fcode + memb->num_vars)
16546 else if (soughtcode < memb->base_fcode)
16552 static enum insn_code
16553 locate_neon_builtin_icode (int fcode, neon_itype *itype)
16555 neon_builtin_datum key, *found;
16558 key.base_fcode = fcode;
16559 found = (neon_builtin_datum *)
16560 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
16561 sizeof (neon_builtin_data[0]), neon_builtin_compare);
16562 gcc_assert (found);
16563 idx = fcode - (int) found->base_fcode;
16564 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
16567 *itype = found->itype;
16569 return found->codes[idx];
16573 NEON_ARG_COPY_TO_REG,
16578 #define NEON_MAX_BUILTIN_ARGS 5
16580 /* Expand a Neon builtin. */
16582 arm_expand_neon_args (rtx target, int icode, int have_retval,
16587 tree arg[NEON_MAX_BUILTIN_ARGS];
16588 rtx op[NEON_MAX_BUILTIN_ARGS];
16589 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16590 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
16595 || GET_MODE (target) != tmode
16596 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
16597 target = gen_reg_rtx (tmode);
16599 va_start (ap, exp);
16603 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
16605 if (thisarg == NEON_ARG_STOP)
16609 arg[argc] = CALL_EXPR_ARG (exp, argc);
16610 op[argc] = expand_normal (arg[argc]);
16611 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
16615 case NEON_ARG_COPY_TO_REG:
16616 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
16617 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
16618 (op[argc], mode[argc]))
16619 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
16622 case NEON_ARG_CONSTANT:
16623 /* FIXME: This error message is somewhat unhelpful. */
16624 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
16625 (op[argc], mode[argc]))
16626 error ("argument must be a constant");
16629 case NEON_ARG_STOP:
16630 gcc_unreachable ();
16643 pat = GEN_FCN (icode) (target, op[0]);
16647 pat = GEN_FCN (icode) (target, op[0], op[1]);
16651 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
16655 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
16659 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
16663 gcc_unreachable ();
16669 pat = GEN_FCN (icode) (op[0]);
16673 pat = GEN_FCN (icode) (op[0], op[1]);
16677 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
16681 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
16685 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
16689 gcc_unreachable ();
16700 /* Expand a Neon builtin. These are "special" because they don't have symbolic
16701 constants defined per-instruction or per instruction-variant. Instead, the
16702 required info is looked up in the table neon_builtin_data. */
16704 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
16707 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
16714 return arm_expand_neon_args (target, icode, 1, exp,
16715 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
16719 case NEON_SCALARMUL:
16720 case NEON_SCALARMULL:
16721 case NEON_SCALARMULH:
16722 case NEON_SHIFTINSERT:
16723 case NEON_LOGICBINOP:
16724 return arm_expand_neon_args (target, icode, 1, exp,
16725 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16729 return arm_expand_neon_args (target, icode, 1, exp,
16730 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16731 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16735 case NEON_SHIFTIMM:
16736 return arm_expand_neon_args (target, icode, 1, exp,
16737 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
16741 return arm_expand_neon_args (target, icode, 1, exp,
16742 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16746 case NEON_REINTERP:
16747 return arm_expand_neon_args (target, icode, 1, exp,
16748 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16752 return arm_expand_neon_args (target, icode, 1, exp,
16753 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16755 case NEON_RESULTPAIR:
16756 return arm_expand_neon_args (target, icode, 0, exp,
16757 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16761 case NEON_LANEMULL:
16762 case NEON_LANEMULH:
16763 return arm_expand_neon_args (target, icode, 1, exp,
16764 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16765 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16768 return arm_expand_neon_args (target, icode, 1, exp,
16769 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16770 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
16772 case NEON_SHIFTACC:
16773 return arm_expand_neon_args (target, icode, 1, exp,
16774 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16775 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16777 case NEON_SCALARMAC:
16778 return arm_expand_neon_args (target, icode, 1, exp,
16779 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16780 NEON_ARG_CONSTANT, NEON_ARG_STOP);
16784 return arm_expand_neon_args (target, icode, 1, exp,
16785 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
16789 case NEON_LOADSTRUCT:
16790 return arm_expand_neon_args (target, icode, 1, exp,
16791 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16793 case NEON_LOAD1LANE:
16794 case NEON_LOADSTRUCTLANE:
16795 return arm_expand_neon_args (target, icode, 1, exp,
16796 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16800 case NEON_STORESTRUCT:
16801 return arm_expand_neon_args (target, icode, 0, exp,
16802 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
16804 case NEON_STORE1LANE:
16805 case NEON_STORESTRUCTLANE:
16806 return arm_expand_neon_args (target, icode, 0, exp,
16807 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
16811 gcc_unreachable ();
16814 /* Emit code to reinterpret one Neon type as another, without altering bits. */
16816 neon_reinterpret (rtx dest, rtx src)
16818 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
16821 /* Emit code to place a Neon pair result in memory locations (with equal
16824 neon_emit_pair_result_insn (enum machine_mode mode,
16825 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
16828 rtx mem = gen_rtx_MEM (mode, destaddr);
16829 rtx tmp1 = gen_reg_rtx (mode);
16830 rtx tmp2 = gen_reg_rtx (mode);
16832 emit_insn (intfn (tmp1, op1, tmp2, op2));
16834 emit_move_insn (mem, tmp1);
16835 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
16836 emit_move_insn (mem, tmp2);
16839 /* Set up operands for a register copy from src to dest, taking care not to
16840 clobber registers in the process.
16841 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
16842 be called with a large N, so that should be OK. */
16845 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
16847 unsigned int copied = 0, opctr = 0;
16848 unsigned int done = (1 << count) - 1;
16851 while (copied != done)
16853 for (i = 0; i < count; i++)
16857 for (j = 0; good && j < count; j++)
16858 if (i != j && (copied & (1 << j)) == 0
16859 && reg_overlap_mentioned_p (src[j], dest[i]))
16864 operands[opctr++] = dest[i];
16865 operands[opctr++] = src[i];
16871 gcc_assert (opctr == count * 2);
16874 /* Expand an expression EXP that calls a built-in function,
16875 with result going to TARGET if that's convenient
16876 (and in mode MODE if that's convenient).
16877 SUBTARGET may be used as the target for computing one of EXP's operands.
16878 IGNORE is nonzero if the value is to be ignored. */
16881 arm_expand_builtin (tree exp,
16883 rtx subtarget ATTRIBUTE_UNUSED,
16884 enum machine_mode mode ATTRIBUTE_UNUSED,
16885 int ignore ATTRIBUTE_UNUSED)
16887 const struct builtin_description * d;
16888 enum insn_code icode;
16889 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16897 int fcode = DECL_FUNCTION_CODE (fndecl);
16899 enum machine_mode tmode;
16900 enum machine_mode mode0;
16901 enum machine_mode mode1;
16902 enum machine_mode mode2;
16904 if (fcode >= ARM_BUILTIN_NEON_BASE)
16905 return arm_expand_neon_builtin (fcode, exp, target);
16909 case ARM_BUILTIN_TEXTRMSB:
16910 case ARM_BUILTIN_TEXTRMUB:
16911 case ARM_BUILTIN_TEXTRMSH:
16912 case ARM_BUILTIN_TEXTRMUH:
16913 case ARM_BUILTIN_TEXTRMSW:
16914 case ARM_BUILTIN_TEXTRMUW:
16915 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
16916 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
16917 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
16918 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
16919 : CODE_FOR_iwmmxt_textrmw);
16921 arg0 = CALL_EXPR_ARG (exp, 0);
16922 arg1 = CALL_EXPR_ARG (exp, 1);
16923 op0 = expand_normal (arg0);
16924 op1 = expand_normal (arg1);
16925 tmode = insn_data[icode].operand[0].mode;
16926 mode0 = insn_data[icode].operand[1].mode;
16927 mode1 = insn_data[icode].operand[2].mode;
16929 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16930 op0 = copy_to_mode_reg (mode0, op0);
16931 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16933 /* @@@ better error message */
16934 error ("selector must be an immediate");
16935 return gen_reg_rtx (tmode);
16938 || GET_MODE (target) != tmode
16939 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16940 target = gen_reg_rtx (tmode);
16941 pat = GEN_FCN (icode) (target, op0, op1);
16947 case ARM_BUILTIN_TINSRB:
16948 case ARM_BUILTIN_TINSRH:
16949 case ARM_BUILTIN_TINSRW:
16950 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
16951 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
16952 : CODE_FOR_iwmmxt_tinsrw);
16953 arg0 = CALL_EXPR_ARG (exp, 0);
16954 arg1 = CALL_EXPR_ARG (exp, 1);
16955 arg2 = CALL_EXPR_ARG (exp, 2);
16956 op0 = expand_normal (arg0);
16957 op1 = expand_normal (arg1);
16958 op2 = expand_normal (arg2);
16959 tmode = insn_data[icode].operand[0].mode;
16960 mode0 = insn_data[icode].operand[1].mode;
16961 mode1 = insn_data[icode].operand[2].mode;
16962 mode2 = insn_data[icode].operand[3].mode;
16964 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16965 op0 = copy_to_mode_reg (mode0, op0);
16966 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16967 op1 = copy_to_mode_reg (mode1, op1);
16968 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16970 /* @@@ better error message */
16971 error ("selector must be an immediate");
16975 || GET_MODE (target) != tmode
16976 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16977 target = gen_reg_rtx (tmode);
16978 pat = GEN_FCN (icode) (target, op0, op1, op2);
16984 case ARM_BUILTIN_SETWCX:
16985 arg0 = CALL_EXPR_ARG (exp, 0);
16986 arg1 = CALL_EXPR_ARG (exp, 1);
16987 op0 = force_reg (SImode, expand_normal (arg0));
16988 op1 = expand_normal (arg1);
16989 emit_insn (gen_iwmmxt_tmcr (op1, op0));
16992 case ARM_BUILTIN_GETWCX:
16993 arg0 = CALL_EXPR_ARG (exp, 0);
16994 op0 = expand_normal (arg0);
16995 target = gen_reg_rtx (SImode);
16996 emit_insn (gen_iwmmxt_tmrc (target, op0));
16999 case ARM_BUILTIN_WSHUFH:
17000 icode = CODE_FOR_iwmmxt_wshufh;
17001 arg0 = CALL_EXPR_ARG (exp, 0);
17002 arg1 = CALL_EXPR_ARG (exp, 1);
17003 op0 = expand_normal (arg0);
17004 op1 = expand_normal (arg1);
17005 tmode = insn_data[icode].operand[0].mode;
17006 mode1 = insn_data[icode].operand[1].mode;
17007 mode2 = insn_data[icode].operand[2].mode;
17009 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17010 op0 = copy_to_mode_reg (mode1, op0);
17011 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17013 /* @@@ better error message */
17014 error ("mask must be an immediate");
17018 || GET_MODE (target) != tmode
17019 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17020 target = gen_reg_rtx (tmode);
17021 pat = GEN_FCN (icode) (target, op0, op1);
17027 case ARM_BUILTIN_WSADB:
17028 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
17029 case ARM_BUILTIN_WSADH:
17030 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
17031 case ARM_BUILTIN_WSADBZ:
17032 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
17033 case ARM_BUILTIN_WSADHZ:
17034 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
17036 /* Several three-argument builtins. */
17037 case ARM_BUILTIN_WMACS:
17038 case ARM_BUILTIN_WMACU:
17039 case ARM_BUILTIN_WALIGN:
17040 case ARM_BUILTIN_TMIA:
17041 case ARM_BUILTIN_TMIAPH:
17042 case ARM_BUILTIN_TMIATT:
17043 case ARM_BUILTIN_TMIATB:
17044 case ARM_BUILTIN_TMIABT:
17045 case ARM_BUILTIN_TMIABB:
17046 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
17047 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
17048 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
17049 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
17050 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
17051 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
17052 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
17053 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
17054 : CODE_FOR_iwmmxt_walign);
17055 arg0 = CALL_EXPR_ARG (exp, 0);
17056 arg1 = CALL_EXPR_ARG (exp, 1);
17057 arg2 = CALL_EXPR_ARG (exp, 2);
17058 op0 = expand_normal (arg0);
17059 op1 = expand_normal (arg1);
17060 op2 = expand_normal (arg2);
17061 tmode = insn_data[icode].operand[0].mode;
17062 mode0 = insn_data[icode].operand[1].mode;
17063 mode1 = insn_data[icode].operand[2].mode;
17064 mode2 = insn_data[icode].operand[3].mode;
17066 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17067 op0 = copy_to_mode_reg (mode0, op0);
17068 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
17069 op1 = copy_to_mode_reg (mode1, op1);
17070 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17071 op2 = copy_to_mode_reg (mode2, op2);
17073 || GET_MODE (target) != tmode
17074 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17075 target = gen_reg_rtx (tmode);
17076 pat = GEN_FCN (icode) (target, op0, op1, op2);
17082 case ARM_BUILTIN_WZERO:
17083 target = gen_reg_rtx (DImode);
17084 emit_insn (gen_iwmmxt_clrdi (target));
17087 case ARM_BUILTIN_THREAD_POINTER:
17088 return arm_load_tp (target);
17094 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17095 if (d->code == (const enum arm_builtins) fcode)
17096 return arm_expand_binop_builtin (d->icode, exp, target);
17098 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17099 if (d->code == (const enum arm_builtins) fcode)
17100 return arm_expand_unop_builtin (d->icode, exp, target, 0);
17102 /* @@@ Should really do something sensible here. */
17106 /* Return the number (counting from 0) of
17107 the least significant set bit in MASK. */
17110 number_of_first_bit_set (unsigned mask)
17115 (mask & (1 << bit)) == 0;
17122 /* Emit code to push or pop registers to or from the stack. F is the
17123 assembly file. MASK is the registers to push or pop. PUSH is
17124 nonzero if we should push, and zero if we should pop. For debugging
17125 output, if pushing, adjust CFA_OFFSET by the amount of space added
17126 to the stack. REAL_REGS should have the same number of bits set as
17127 MASK, and will be used instead (in the same order) to describe which
17128 registers were saved - this is used to mark the save slots when we
17129 push high registers after moving them to low registers. */
17131 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
17132 unsigned long real_regs)
17135 int lo_mask = mask & 0xFF;
17136 int pushed_words = 0;
17140 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
17142 /* Special case. Do not generate a POP PC statement here, do it in
17144 thumb_exit (f, -1);
17148 if (ARM_EABI_UNWIND_TABLES && push)
17150 fprintf (f, "\t.save\t{");
17151 for (regno = 0; regno < 15; regno++)
17153 if (real_regs & (1 << regno))
17155 if (real_regs & ((1 << regno) -1))
17157 asm_fprintf (f, "%r", regno);
17160 fprintf (f, "}\n");
17163 fprintf (f, "\t%s\t{", push ? "push" : "pop");
17165 /* Look at the low registers first. */
17166 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
17170 asm_fprintf (f, "%r", regno);
17172 if ((lo_mask & ~1) != 0)
17179 if (push && (mask & (1 << LR_REGNUM)))
17181 /* Catch pushing the LR. */
17185 asm_fprintf (f, "%r", LR_REGNUM);
17189 else if (!push && (mask & (1 << PC_REGNUM)))
17191 /* Catch popping the PC. */
17192 if (TARGET_INTERWORK || TARGET_BACKTRACE
17193 || crtl->calls_eh_return)
17195 /* The PC is never poped directly, instead
17196 it is popped into r3 and then BX is used. */
17197 fprintf (f, "}\n");
17199 thumb_exit (f, -1);
17208 asm_fprintf (f, "%r", PC_REGNUM);
17212 fprintf (f, "}\n");
17214 if (push && pushed_words && dwarf2out_do_frame ())
17216 char *l = dwarf2out_cfi_label (false);
17217 int pushed_mask = real_regs;
17219 *cfa_offset += pushed_words * 4;
17220 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
17223 pushed_mask = real_regs;
17224 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
17226 if (pushed_mask & 1)
17227 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
17232 /* Generate code to return from a thumb function.
17233 If 'reg_containing_return_addr' is -1, then the return address is
17234 actually on the stack, at the stack pointer. */
17236 thumb_exit (FILE *f, int reg_containing_return_addr)
17238 unsigned regs_available_for_popping;
17239 unsigned regs_to_pop;
17241 unsigned available;
17245 int restore_a4 = FALSE;
17247 /* Compute the registers we need to pop. */
17251 if (reg_containing_return_addr == -1)
17253 regs_to_pop |= 1 << LR_REGNUM;
17257 if (TARGET_BACKTRACE)
17259 /* Restore the (ARM) frame pointer and stack pointer. */
17260 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
17264 /* If there is nothing to pop then just emit the BX instruction and
17266 if (pops_needed == 0)
17268 if (crtl->calls_eh_return)
17269 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
17271 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
17274 /* Otherwise if we are not supporting interworking and we have not created
17275 a backtrace structure and the function was not entered in ARM mode then
17276 just pop the return address straight into the PC. */
17277 else if (!TARGET_INTERWORK
17278 && !TARGET_BACKTRACE
17279 && !is_called_in_ARM_mode (current_function_decl)
17280 && !crtl->calls_eh_return)
17282 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
17286 /* Find out how many of the (return) argument registers we can corrupt. */
17287 regs_available_for_popping = 0;
17289 /* If returning via __builtin_eh_return, the bottom three registers
17290 all contain information needed for the return. */
17291 if (crtl->calls_eh_return)
17295 /* If we can deduce the registers used from the function's
17296 return value. This is more reliable that examining
17297 df_regs_ever_live_p () because that will be set if the register is
17298 ever used in the function, not just if the register is used
17299 to hold a return value. */
17301 if (crtl->return_rtx != 0)
17302 mode = GET_MODE (crtl->return_rtx);
17304 mode = DECL_MODE (DECL_RESULT (current_function_decl));
17306 size = GET_MODE_SIZE (mode);
17310 /* In a void function we can use any argument register.
17311 In a function that returns a structure on the stack
17312 we can use the second and third argument registers. */
17313 if (mode == VOIDmode)
17314 regs_available_for_popping =
17315 (1 << ARG_REGISTER (1))
17316 | (1 << ARG_REGISTER (2))
17317 | (1 << ARG_REGISTER (3));
17319 regs_available_for_popping =
17320 (1 << ARG_REGISTER (2))
17321 | (1 << ARG_REGISTER (3));
17323 else if (size <= 4)
17324 regs_available_for_popping =
17325 (1 << ARG_REGISTER (2))
17326 | (1 << ARG_REGISTER (3));
17327 else if (size <= 8)
17328 regs_available_for_popping =
17329 (1 << ARG_REGISTER (3));
17332 /* Match registers to be popped with registers into which we pop them. */
17333 for (available = regs_available_for_popping,
17334 required = regs_to_pop;
17335 required != 0 && available != 0;
17336 available &= ~(available & - available),
17337 required &= ~(required & - required))
17340 /* If we have any popping registers left over, remove them. */
17342 regs_available_for_popping &= ~available;
17344 /* Otherwise if we need another popping register we can use
17345 the fourth argument register. */
17346 else if (pops_needed)
17348 /* If we have not found any free argument registers and
17349 reg a4 contains the return address, we must move it. */
17350 if (regs_available_for_popping == 0
17351 && reg_containing_return_addr == LAST_ARG_REGNUM)
17353 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
17354 reg_containing_return_addr = LR_REGNUM;
17356 else if (size > 12)
17358 /* Register a4 is being used to hold part of the return value,
17359 but we have dire need of a free, low register. */
17362 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
17365 if (reg_containing_return_addr != LAST_ARG_REGNUM)
17367 /* The fourth argument register is available. */
17368 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
17374 /* Pop as many registers as we can. */
17375 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
17376 regs_available_for_popping);
17378 /* Process the registers we popped. */
17379 if (reg_containing_return_addr == -1)
17381 /* The return address was popped into the lowest numbered register. */
17382 regs_to_pop &= ~(1 << LR_REGNUM);
17384 reg_containing_return_addr =
17385 number_of_first_bit_set (regs_available_for_popping);
17387 /* Remove this register for the mask of available registers, so that
17388 the return address will not be corrupted by further pops. */
17389 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
17392 /* If we popped other registers then handle them here. */
17393 if (regs_available_for_popping)
17397 /* Work out which register currently contains the frame pointer. */
17398 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
17400 /* Move it into the correct place. */
17401 asm_fprintf (f, "\tmov\t%r, %r\n",
17402 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
17404 /* (Temporarily) remove it from the mask of popped registers. */
17405 regs_available_for_popping &= ~(1 << frame_pointer);
17406 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
17408 if (regs_available_for_popping)
17412 /* We popped the stack pointer as well,
17413 find the register that contains it. */
17414 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
17416 /* Move it into the stack register. */
17417 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
17419 /* At this point we have popped all necessary registers, so
17420 do not worry about restoring regs_available_for_popping
17421 to its correct value:
17423 assert (pops_needed == 0)
17424 assert (regs_available_for_popping == (1 << frame_pointer))
17425 assert (regs_to_pop == (1 << STACK_POINTER)) */
17429 /* Since we have just move the popped value into the frame
17430 pointer, the popping register is available for reuse, and
17431 we know that we still have the stack pointer left to pop. */
17432 regs_available_for_popping |= (1 << frame_pointer);
17436 /* If we still have registers left on the stack, but we no longer have
17437 any registers into which we can pop them, then we must move the return
17438 address into the link register and make available the register that
17440 if (regs_available_for_popping == 0 && pops_needed > 0)
17442 regs_available_for_popping |= 1 << reg_containing_return_addr;
17444 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
17445 reg_containing_return_addr);
17447 reg_containing_return_addr = LR_REGNUM;
17450 /* If we have registers left on the stack then pop some more.
17451 We know that at most we will want to pop FP and SP. */
17452 if (pops_needed > 0)
17457 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
17458 regs_available_for_popping);
17460 /* We have popped either FP or SP.
17461 Move whichever one it is into the correct register. */
17462 popped_into = number_of_first_bit_set (regs_available_for_popping);
17463 move_to = number_of_first_bit_set (regs_to_pop);
17465 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
17467 regs_to_pop &= ~(1 << move_to);
17472 /* If we still have not popped everything then we must have only
17473 had one register available to us and we are now popping the SP. */
17474 if (pops_needed > 0)
17478 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
17479 regs_available_for_popping);
17481 popped_into = number_of_first_bit_set (regs_available_for_popping);
17483 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
17485 assert (regs_to_pop == (1 << STACK_POINTER))
17486 assert (pops_needed == 1)
17490 /* If necessary restore the a4 register. */
17493 if (reg_containing_return_addr != LR_REGNUM)
17495 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
17496 reg_containing_return_addr = LR_REGNUM;
17499 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
17502 if (crtl->calls_eh_return)
17503 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
17505 /* Return to caller. */
17506 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
17511 thumb1_final_prescan_insn (rtx insn)
17513 if (flag_print_asm_name)
17514 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
17515 INSN_ADDRESSES (INSN_UID (insn)));
17519 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
17521 unsigned HOST_WIDE_INT mask = 0xff;
17524 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
17525 if (val == 0) /* XXX */
17528 for (i = 0; i < 25; i++)
17529 if ((val & (mask << i)) == val)
17535 /* Returns nonzero if the current function contains,
17536 or might contain a far jump. */
17538 thumb_far_jump_used_p (void)
17542 /* This test is only important for leaf functions. */
17543 /* assert (!leaf_function_p ()); */
17545 /* If we have already decided that far jumps may be used,
17546 do not bother checking again, and always return true even if
17547 it turns out that they are not being used. Once we have made
17548 the decision that far jumps are present (and that hence the link
17549 register will be pushed onto the stack) we cannot go back on it. */
17550 if (cfun->machine->far_jump_used)
17553 /* If this function is not being called from the prologue/epilogue
17554 generation code then it must be being called from the
17555 INITIAL_ELIMINATION_OFFSET macro. */
17556 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
17558 /* In this case we know that we are being asked about the elimination
17559 of the arg pointer register. If that register is not being used,
17560 then there are no arguments on the stack, and we do not have to
17561 worry that a far jump might force the prologue to push the link
17562 register, changing the stack offsets. In this case we can just
17563 return false, since the presence of far jumps in the function will
17564 not affect stack offsets.
17566 If the arg pointer is live (or if it was live, but has now been
17567 eliminated and so set to dead) then we do have to test to see if
17568 the function might contain a far jump. This test can lead to some
17569 false negatives, since before reload is completed, then length of
17570 branch instructions is not known, so gcc defaults to returning their
17571 longest length, which in turn sets the far jump attribute to true.
17573 A false negative will not result in bad code being generated, but it
17574 will result in a needless push and pop of the link register. We
17575 hope that this does not occur too often.
17577 If we need doubleword stack alignment this could affect the other
17578 elimination offsets so we can't risk getting it wrong. */
17579 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
17580 cfun->machine->arg_pointer_live = 1;
17581 else if (!cfun->machine->arg_pointer_live)
17585 /* Check to see if the function contains a branch
17586 insn with the far jump attribute set. */
17587 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17589 if (GET_CODE (insn) == JUMP_INSN
17590 /* Ignore tablejump patterns. */
17591 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17592 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
17593 && get_attr_far_jump (insn) == FAR_JUMP_YES
17596 /* Record the fact that we have decided that
17597 the function does use far jumps. */
17598 cfun->machine->far_jump_used = 1;
17606 /* Return nonzero if FUNC must be entered in ARM mode. */
17608 is_called_in_ARM_mode (tree func)
17610 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
17612 /* Ignore the problem about functions whose address is taken. */
17613 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
17617 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
17623 /* The bits which aren't usefully expanded as rtl. */
17625 thumb_unexpanded_epilogue (void)
17627 arm_stack_offsets *offsets;
17629 unsigned long live_regs_mask = 0;
17630 int high_regs_pushed = 0;
17631 int had_to_push_lr;
17634 if (cfun->machine->return_used_this_function != 0)
17637 if (IS_NAKED (arm_current_func_type ()))
17640 offsets = arm_get_frame_offsets ();
17641 live_regs_mask = offsets->saved_regs_mask;
17642 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
17644 /* If we can deduce the registers used from the function's return value.
17645 This is more reliable that examining df_regs_ever_live_p () because that
17646 will be set if the register is ever used in the function, not just if
17647 the register is used to hold a return value. */
17648 size = arm_size_return_regs ();
17650 /* The prolog may have pushed some high registers to use as
17651 work registers. e.g. the testsuite file:
17652 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
17653 compiles to produce:
17654 push {r4, r5, r6, r7, lr}
17658 as part of the prolog. We have to undo that pushing here. */
17660 if (high_regs_pushed)
17662 unsigned long mask = live_regs_mask & 0xff;
17665 /* The available low registers depend on the size of the value we are
17673 /* Oh dear! We have no low registers into which we can pop
17676 ("no low registers available for popping high registers");
17678 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
17679 if (live_regs_mask & (1 << next_hi_reg))
17682 while (high_regs_pushed)
17684 /* Find lo register(s) into which the high register(s) can
17686 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
17688 if (mask & (1 << regno))
17689 high_regs_pushed--;
17690 if (high_regs_pushed == 0)
17694 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
17696 /* Pop the values into the low register(s). */
17697 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
17699 /* Move the value(s) into the high registers. */
17700 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
17702 if (mask & (1 << regno))
17704 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
17707 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
17708 if (live_regs_mask & (1 << next_hi_reg))
17713 live_regs_mask &= ~0x0f00;
17716 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
17717 live_regs_mask &= 0xff;
17719 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
17721 /* Pop the return address into the PC. */
17722 if (had_to_push_lr)
17723 live_regs_mask |= 1 << PC_REGNUM;
17725 /* Either no argument registers were pushed or a backtrace
17726 structure was created which includes an adjusted stack
17727 pointer, so just pop everything. */
17728 if (live_regs_mask)
17729 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
17732 /* We have either just popped the return address into the
17733 PC or it is was kept in LR for the entire function. */
17734 if (!had_to_push_lr)
17735 thumb_exit (asm_out_file, LR_REGNUM);
17739 /* Pop everything but the return address. */
17740 if (live_regs_mask)
17741 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
17744 if (had_to_push_lr)
17748 /* We have no free low regs, so save one. */
17749 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
17753 /* Get the return address into a temporary register. */
17754 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
17755 1 << LAST_ARG_REGNUM);
17759 /* Move the return address to lr. */
17760 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
17762 /* Restore the low register. */
17763 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
17768 regno = LAST_ARG_REGNUM;
17773 /* Remove the argument registers that were pushed onto the stack. */
17774 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
17775 SP_REGNUM, SP_REGNUM,
17776 crtl->args.pretend_args_size);
17778 thumb_exit (asm_out_file, regno);
17784 /* Functions to save and restore machine-specific function data. */
17785 static struct machine_function *
17786 arm_init_machine_status (void)
17788 struct machine_function *machine;
17789 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
17791 #if ARM_FT_UNKNOWN != 0
17792 machine->func_type = ARM_FT_UNKNOWN;
17797 /* Return an RTX indicating where the return address to the
17798 calling function can be found. */
17800 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
17805 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
17808 /* Do anything needed before RTL is emitted for each function. */
17810 arm_init_expanders (void)
17812 /* Arrange to initialize and mark the machine per-function status. */
17813 init_machine_status = arm_init_machine_status;
17815 /* This is to stop the combine pass optimizing away the alignment
17816 adjustment of va_arg. */
17817 /* ??? It is claimed that this should not be necessary. */
17819 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
17823 /* Like arm_compute_initial_elimination offset. Simpler because there
17824 isn't an ABI specified frame pointer for Thumb. Instead, we set it
17825 to point at the base of the local variables after static stack
17826 space for a function has been allocated. */
17829 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
17831 arm_stack_offsets *offsets;
17833 offsets = arm_get_frame_offsets ();
17837 case ARG_POINTER_REGNUM:
17840 case STACK_POINTER_REGNUM:
17841 return offsets->outgoing_args - offsets->saved_args;
17843 case FRAME_POINTER_REGNUM:
17844 return offsets->soft_frame - offsets->saved_args;
17846 case ARM_HARD_FRAME_POINTER_REGNUM:
17847 return offsets->saved_regs - offsets->saved_args;
17849 case THUMB_HARD_FRAME_POINTER_REGNUM:
17850 return offsets->locals_base - offsets->saved_args;
17853 gcc_unreachable ();
17857 case FRAME_POINTER_REGNUM:
17860 case STACK_POINTER_REGNUM:
17861 return offsets->outgoing_args - offsets->soft_frame;
17863 case ARM_HARD_FRAME_POINTER_REGNUM:
17864 return offsets->saved_regs - offsets->soft_frame;
17866 case THUMB_HARD_FRAME_POINTER_REGNUM:
17867 return offsets->locals_base - offsets->soft_frame;
17870 gcc_unreachable ();
17875 gcc_unreachable ();
17879 /* Generate the rest of a function's prologue. */
17881 thumb1_expand_prologue (void)
17885 HOST_WIDE_INT amount;
17886 arm_stack_offsets *offsets;
17887 unsigned long func_type;
17889 unsigned long live_regs_mask;
17891 func_type = arm_current_func_type ();
17893 /* Naked functions don't have prologues. */
17894 if (IS_NAKED (func_type))
17897 if (IS_INTERRUPT (func_type))
17899 error ("interrupt Service Routines cannot be coded in Thumb mode");
17903 offsets = arm_get_frame_offsets ();
17904 live_regs_mask = offsets->saved_regs_mask;
17905 /* Load the pic register before setting the frame pointer,
17906 so we can use r7 as a temporary work register. */
17907 if (flag_pic && arm_pic_register != INVALID_REGNUM)
17908 arm_load_pic_register (live_regs_mask);
17910 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
17911 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
17912 stack_pointer_rtx);
17914 amount = offsets->outgoing_args - offsets->saved_regs;
17919 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17920 GEN_INT (- amount)));
17921 RTX_FRAME_RELATED_P (insn) = 1;
17927 /* The stack decrement is too big for an immediate value in a single
17928 insn. In theory we could issue multiple subtracts, but after
17929 three of them it becomes more space efficient to place the full
17930 value in the constant pool and load into a register. (Also the
17931 ARM debugger really likes to see only one stack decrement per
17932 function). So instead we look for a scratch register into which
17933 we can load the decrement, and then we subtract this from the
17934 stack pointer. Unfortunately on the thumb the only available
17935 scratch registers are the argument registers, and we cannot use
17936 these as they may hold arguments to the function. Instead we
17937 attempt to locate a call preserved register which is used by this
17938 function. If we can find one, then we know that it will have
17939 been pushed at the start of the prologue and so we can corrupt
17941 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
17942 if (live_regs_mask & (1 << regno))
17945 gcc_assert(regno <= LAST_LO_REGNUM);
17947 reg = gen_rtx_REG (SImode, regno);
17949 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
17951 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
17952 stack_pointer_rtx, reg));
17953 RTX_FRAME_RELATED_P (insn) = 1;
17954 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17955 plus_constant (stack_pointer_rtx,
17957 RTX_FRAME_RELATED_P (dwarf) = 1;
17958 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
17962 if (frame_pointer_needed)
17963 thumb_set_frame_pointer (offsets);
17965 /* If we are profiling, make sure no instructions are scheduled before
17966 the call to mcount. Similarly if the user has requested no
17967 scheduling in the prolog. Similarly if we want non-call exceptions
17968 using the EABI unwinder, to prevent faulting instructions from being
17969 swapped with a stack adjustment. */
17970 if (crtl->profile || !TARGET_SCHED_PROLOG
17971 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
17972 emit_insn (gen_blockage ());
17974 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
17975 if (live_regs_mask & 0xff)
17976 cfun->machine->lr_save_eliminated = 0;
17981 thumb1_expand_epilogue (void)
17983 HOST_WIDE_INT amount;
17984 arm_stack_offsets *offsets;
17987 /* Naked functions don't have prologues. */
17988 if (IS_NAKED (arm_current_func_type ()))
17991 offsets = arm_get_frame_offsets ();
17992 amount = offsets->outgoing_args - offsets->saved_regs;
17994 if (frame_pointer_needed)
17996 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
17997 amount = offsets->locals_base - offsets->saved_regs;
18000 gcc_assert (amount >= 0);
18004 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
18005 GEN_INT (amount)));
18008 /* r3 is always free in the epilogue. */
18009 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
18011 emit_insn (gen_movsi (reg, GEN_INT (amount)));
18012 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
18016 /* Emit a USE (stack_pointer_rtx), so that
18017 the stack adjustment will not be deleted. */
18018 emit_insn (gen_prologue_use (stack_pointer_rtx));
18020 if (crtl->profile || !TARGET_SCHED_PROLOG)
18021 emit_insn (gen_blockage ());
18023 /* Emit a clobber for each insn that will be restored in the epilogue,
18024 so that flow2 will get register lifetimes correct. */
18025 for (regno = 0; regno < 13; regno++)
18026 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
18027 emit_clobber (gen_rtx_REG (SImode, regno));
18029 if (! df_regs_ever_live_p (LR_REGNUM))
18030 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
18034 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
18036 arm_stack_offsets *offsets;
18037 unsigned long live_regs_mask = 0;
18038 unsigned long l_mask;
18039 unsigned high_regs_pushed = 0;
18040 int cfa_offset = 0;
18043 if (IS_NAKED (arm_current_func_type ()))
18046 if (is_called_in_ARM_mode (current_function_decl))
18050 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
18051 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
18053 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
18055 /* Generate code sequence to switch us into Thumb mode. */
18056 /* The .code 32 directive has already been emitted by
18057 ASM_DECLARE_FUNCTION_NAME. */
18058 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
18059 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
18061 /* Generate a label, so that the debugger will notice the
18062 change in instruction sets. This label is also used by
18063 the assembler to bypass the ARM code when this function
18064 is called from a Thumb encoded function elsewhere in the
18065 same file. Hence the definition of STUB_NAME here must
18066 agree with the definition in gas/config/tc-arm.c. */
18068 #define STUB_NAME ".real_start_of"
18070 fprintf (f, "\t.code\t16\n");
18072 if (arm_dllexport_name_p (name))
18073 name = arm_strip_name_encoding (name);
18075 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
18076 fprintf (f, "\t.thumb_func\n");
18077 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
18080 if (crtl->args.pretend_args_size)
18082 /* Output unwind directive for the stack adjustment. */
18083 if (ARM_EABI_UNWIND_TABLES)
18084 fprintf (f, "\t.pad #%d\n",
18085 crtl->args.pretend_args_size);
18087 if (cfun->machine->uses_anonymous_args)
18091 fprintf (f, "\tpush\t{");
18093 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
18095 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
18096 regno <= LAST_ARG_REGNUM;
18098 asm_fprintf (f, "%r%s", regno,
18099 regno == LAST_ARG_REGNUM ? "" : ", ");
18101 fprintf (f, "}\n");
18104 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
18105 SP_REGNUM, SP_REGNUM,
18106 crtl->args.pretend_args_size);
18108 /* We don't need to record the stores for unwinding (would it
18109 help the debugger any if we did?), but record the change in
18110 the stack pointer. */
18111 if (dwarf2out_do_frame ())
18113 char *l = dwarf2out_cfi_label (false);
18115 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
18116 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
18120 /* Get the registers we are going to push. */
18121 offsets = arm_get_frame_offsets ();
18122 live_regs_mask = offsets->saved_regs_mask;
18123 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
18124 l_mask = live_regs_mask & 0x40ff;
18125 /* Then count how many other high registers will need to be pushed. */
18126 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
18128 if (TARGET_BACKTRACE)
18131 unsigned work_register;
18133 /* We have been asked to create a stack backtrace structure.
18134 The code looks like this:
18138 0 sub SP, #16 Reserve space for 4 registers.
18139 2 push {R7} Push low registers.
18140 4 add R7, SP, #20 Get the stack pointer before the push.
18141 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
18142 8 mov R7, PC Get hold of the start of this code plus 12.
18143 10 str R7, [SP, #16] Store it.
18144 12 mov R7, FP Get hold of the current frame pointer.
18145 14 str R7, [SP, #4] Store it.
18146 16 mov R7, LR Get hold of the current return address.
18147 18 str R7, [SP, #12] Store it.
18148 20 add R7, SP, #16 Point at the start of the backtrace structure.
18149 22 mov FP, R7 Put this value into the frame pointer. */
18151 work_register = thumb_find_work_register (live_regs_mask);
18153 if (ARM_EABI_UNWIND_TABLES)
18154 asm_fprintf (f, "\t.pad #16\n");
18157 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
18158 SP_REGNUM, SP_REGNUM);
18160 if (dwarf2out_do_frame ())
18162 char *l = dwarf2out_cfi_label (false);
18164 cfa_offset = cfa_offset + 16;
18165 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
18170 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
18171 offset = bit_count (l_mask) * UNITS_PER_WORD;
18176 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
18177 offset + 16 + crtl->args.pretend_args_size);
18179 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
18182 /* Make sure that the instruction fetching the PC is in the right place
18183 to calculate "start of backtrace creation code + 12". */
18186 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
18187 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
18189 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
18190 ARM_HARD_FRAME_POINTER_REGNUM);
18191 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
18196 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
18197 ARM_HARD_FRAME_POINTER_REGNUM);
18198 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
18200 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
18201 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
18205 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
18206 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
18208 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
18210 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
18211 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
18213 /* Optimization: If we are not pushing any low registers but we are going
18214 to push some high registers then delay our first push. This will just
18215 be a push of LR and we can combine it with the push of the first high
18217 else if ((l_mask & 0xff) != 0
18218 || (high_regs_pushed == 0 && l_mask))
18219 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
18221 if (high_regs_pushed)
18223 unsigned pushable_regs;
18224 unsigned next_hi_reg;
18226 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
18227 if (live_regs_mask & (1 << next_hi_reg))
18230 pushable_regs = l_mask & 0xff;
18232 if (pushable_regs == 0)
18233 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
18235 while (high_regs_pushed > 0)
18237 unsigned long real_regs_mask = 0;
18239 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
18241 if (pushable_regs & (1 << regno))
18243 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
18245 high_regs_pushed --;
18246 real_regs_mask |= (1 << next_hi_reg);
18248 if (high_regs_pushed)
18250 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
18252 if (live_regs_mask & (1 << next_hi_reg))
18257 pushable_regs &= ~((1 << regno) - 1);
18263 /* If we had to find a work register and we have not yet
18264 saved the LR then add it to the list of regs to push. */
18265 if (l_mask == (1 << LR_REGNUM))
18267 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
18269 real_regs_mask | (1 << LR_REGNUM));
18273 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
18278 /* Handle the case of a double word load into a low register from
18279 a computed memory address. The computed address may involve a
18280 register which is overwritten by the load. */
18282 thumb_load_double_from_address (rtx *operands)
18290 gcc_assert (GET_CODE (operands[0]) == REG);
18291 gcc_assert (GET_CODE (operands[1]) == MEM);
18293 /* Get the memory address. */
18294 addr = XEXP (operands[1], 0);
18296 /* Work out how the memory address is computed. */
18297 switch (GET_CODE (addr))
18300 operands[2] = adjust_address (operands[1], SImode, 4);
18302 if (REGNO (operands[0]) == REGNO (addr))
18304 output_asm_insn ("ldr\t%H0, %2", operands);
18305 output_asm_insn ("ldr\t%0, %1", operands);
18309 output_asm_insn ("ldr\t%0, %1", operands);
18310 output_asm_insn ("ldr\t%H0, %2", operands);
18315 /* Compute <address> + 4 for the high order load. */
18316 operands[2] = adjust_address (operands[1], SImode, 4);
18318 output_asm_insn ("ldr\t%0, %1", operands);
18319 output_asm_insn ("ldr\t%H0, %2", operands);
18323 arg1 = XEXP (addr, 0);
18324 arg2 = XEXP (addr, 1);
18326 if (CONSTANT_P (arg1))
18327 base = arg2, offset = arg1;
18329 base = arg1, offset = arg2;
18331 gcc_assert (GET_CODE (base) == REG);
18333 /* Catch the case of <address> = <reg> + <reg> */
18334 if (GET_CODE (offset) == REG)
18336 int reg_offset = REGNO (offset);
18337 int reg_base = REGNO (base);
18338 int reg_dest = REGNO (operands[0]);
18340 /* Add the base and offset registers together into the
18341 higher destination register. */
18342 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
18343 reg_dest + 1, reg_base, reg_offset);
18345 /* Load the lower destination register from the address in
18346 the higher destination register. */
18347 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
18348 reg_dest, reg_dest + 1);
18350 /* Load the higher destination register from its own address
18352 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
18353 reg_dest + 1, reg_dest + 1);
18357 /* Compute <address> + 4 for the high order load. */
18358 operands[2] = adjust_address (operands[1], SImode, 4);
18360 /* If the computed address is held in the low order register
18361 then load the high order register first, otherwise always
18362 load the low order register first. */
18363 if (REGNO (operands[0]) == REGNO (base))
18365 output_asm_insn ("ldr\t%H0, %2", operands);
18366 output_asm_insn ("ldr\t%0, %1", operands);
18370 output_asm_insn ("ldr\t%0, %1", operands);
18371 output_asm_insn ("ldr\t%H0, %2", operands);
18377 /* With no registers to worry about we can just load the value
18379 operands[2] = adjust_address (operands[1], SImode, 4);
18381 output_asm_insn ("ldr\t%H0, %2", operands);
18382 output_asm_insn ("ldr\t%0, %1", operands);
18386 gcc_unreachable ();
18393 thumb_output_move_mem_multiple (int n, rtx *operands)
18400 if (REGNO (operands[4]) > REGNO (operands[5]))
18403 operands[4] = operands[5];
18406 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
18407 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
18411 if (REGNO (operands[4]) > REGNO (operands[5]))
18414 operands[4] = operands[5];
18417 if (REGNO (operands[5]) > REGNO (operands[6]))
18420 operands[5] = operands[6];
18423 if (REGNO (operands[4]) > REGNO (operands[5]))
18426 operands[4] = operands[5];
18430 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
18431 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
18435 gcc_unreachable ();
18441 /* Output a call-via instruction for thumb state. */
18443 thumb_call_via_reg (rtx reg)
18445 int regno = REGNO (reg);
18448 gcc_assert (regno < LR_REGNUM);
18450 /* If we are in the normal text section we can use a single instance
18451 per compilation unit. If we are doing function sections, then we need
18452 an entry per section, since we can't rely on reachability. */
18453 if (in_section == text_section)
18455 thumb_call_reg_needed = 1;
18457 if (thumb_call_via_label[regno] == NULL)
18458 thumb_call_via_label[regno] = gen_label_rtx ();
18459 labelp = thumb_call_via_label + regno;
18463 if (cfun->machine->call_via[regno] == NULL)
18464 cfun->machine->call_via[regno] = gen_label_rtx ();
18465 labelp = cfun->machine->call_via + regno;
18468 output_asm_insn ("bl\t%a0", labelp);
18472 /* Routines for generating rtl. */
18474 thumb_expand_movmemqi (rtx *operands)
18476 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
18477 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
18478 HOST_WIDE_INT len = INTVAL (operands[2]);
18479 HOST_WIDE_INT offset = 0;
18483 emit_insn (gen_movmem12b (out, in, out, in));
18489 emit_insn (gen_movmem8b (out, in, out, in));
18495 rtx reg = gen_reg_rtx (SImode);
18496 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
18497 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
18504 rtx reg = gen_reg_rtx (HImode);
18505 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
18506 plus_constant (in, offset))));
18507 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
18515 rtx reg = gen_reg_rtx (QImode);
18516 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
18517 plus_constant (in, offset))));
18518 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
18524 thumb_reload_out_hi (rtx *operands)
18526 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
18529 /* Handle reading a half-word from memory during reload. */
18531 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
18533 gcc_unreachable ();
18536 /* Return the length of a function name prefix
18537 that starts with the character 'c'. */
18539 arm_get_strip_length (int c)
18543 ARM_NAME_ENCODING_LENGTHS
18548 /* Return a pointer to a function's name with any
18549 and all prefix encodings stripped from it. */
18551 arm_strip_name_encoding (const char *name)
18555 while ((skip = arm_get_strip_length (* name)))
18561 /* If there is a '*' anywhere in the name's prefix, then
18562 emit the stripped name verbatim, otherwise prepend an
18563 underscore if leading underscores are being used. */
18565 arm_asm_output_labelref (FILE *stream, const char *name)
18570 while ((skip = arm_get_strip_length (* name)))
18572 verbatim |= (*name == '*');
18577 fputs (name, stream);
18579 asm_fprintf (stream, "%U%s", name);
18583 arm_file_start (void)
18587 if (TARGET_UNIFIED_ASM)
18588 asm_fprintf (asm_out_file, "\t.syntax unified\n");
18592 const char *fpu_name;
18593 if (arm_select[0].string)
18594 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
18595 else if (arm_select[1].string)
18596 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
18598 asm_fprintf (asm_out_file, "\t.cpu %s\n",
18599 all_cores[arm_default_cpu].name);
18601 if (TARGET_SOFT_FLOAT)
18604 fpu_name = "softvfp";
18606 fpu_name = "softfpa";
18610 int set_float_abi_attributes = 0;
18611 switch (arm_fpu_arch)
18616 case FPUTYPE_FPA_EMU2:
18619 case FPUTYPE_FPA_EMU3:
18622 case FPUTYPE_MAVERICK:
18623 fpu_name = "maverick";
18627 set_float_abi_attributes = 1;
18629 case FPUTYPE_VFP3D16:
18630 fpu_name = "vfpv3-d16";
18631 set_float_abi_attributes = 1;
18634 fpu_name = "vfpv3";
18635 set_float_abi_attributes = 1;
18639 set_float_abi_attributes = 1;
18641 case FPUTYPE_NEON_FP16:
18642 fpu_name = "neon-fp16";
18643 set_float_abi_attributes = 1;
18648 if (set_float_abi_attributes)
18650 if (TARGET_HARD_FLOAT)
18651 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
18652 if (TARGET_HARD_FLOAT_ABI)
18653 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
18656 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
18658 /* Some of these attributes only apply when the corresponding features
18659 are used. However we don't have any easy way of figuring this out.
18660 Conservatively record the setting that would have been used. */
18662 /* Tag_ABI_FP_rounding. */
18663 if (flag_rounding_math)
18664 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
18665 if (!flag_unsafe_math_optimizations)
18667 /* Tag_ABI_FP_denomal. */
18668 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
18669 /* Tag_ABI_FP_exceptions. */
18670 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
18672 /* Tag_ABI_FP_user_exceptions. */
18673 if (flag_signaling_nans)
18674 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
18675 /* Tag_ABI_FP_number_model. */
18676 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
18677 flag_finite_math_only ? 1 : 3);
18679 /* Tag_ABI_align8_needed. */
18680 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
18681 /* Tag_ABI_align8_preserved. */
18682 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
18683 /* Tag_ABI_enum_size. */
18684 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
18685 flag_short_enums ? 1 : 2);
18687 /* Tag_ABI_optimization_goals. */
18690 else if (optimize >= 2)
18696 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
18698 /* Tag_ABI_FP_16bit_format. */
18699 if (arm_fp16_format)
18700 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
18701 (int)arm_fp16_format);
18703 if (arm_lang_output_object_attributes_hook)
18704 arm_lang_output_object_attributes_hook();
18706 default_file_start();
18710 arm_file_end (void)
18714 if (NEED_INDICATE_EXEC_STACK)
18715 /* Add .note.GNU-stack. */
18716 file_end_indicate_exec_stack ();
18718 if (! thumb_call_reg_needed)
18721 switch_to_section (text_section);
18722 asm_fprintf (asm_out_file, "\t.code 16\n");
18723 ASM_OUTPUT_ALIGN (asm_out_file, 1);
18725 for (regno = 0; regno < LR_REGNUM; regno++)
18727 rtx label = thumb_call_via_label[regno];
18731 targetm.asm_out.internal_label (asm_out_file, "L",
18732 CODE_LABEL_NUMBER (label));
18733 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
18739 /* Symbols in the text segment can be accessed without indirecting via the
18740 constant pool; it may take an extra binary operation, but this is still
18741 faster than indirecting via memory. Don't do this when not optimizing,
18742 since we won't be calculating al of the offsets necessary to do this
18746 arm_encode_section_info (tree decl, rtx rtl, int first)
18748 if (optimize > 0 && TREE_CONSTANT (decl))
18749 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
18751 default_encode_section_info (decl, rtl, first);
18753 #endif /* !ARM_PE */
18756 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
18758 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
18759 && !strcmp (prefix, "L"))
18761 arm_ccfsm_state = 0;
18762 arm_target_insn = NULL;
18764 default_internal_label (stream, prefix, labelno);
18767 /* Output code to add DELTA to the first argument, and then jump
18768 to FUNCTION. Used for C++ multiple inheritance. */
18770 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
18771 HOST_WIDE_INT delta,
18772 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
18775 static int thunk_label = 0;
18778 int mi_delta = delta;
18779 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
18781 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
18784 mi_delta = - mi_delta;
18788 int labelno = thunk_label++;
18789 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
18790 /* Thunks are entered in arm mode when avaiable. */
18791 if (TARGET_THUMB1_ONLY)
18793 /* push r3 so we can use it as a temporary. */
18794 /* TODO: Omit this save if r3 is not used. */
18795 fputs ("\tpush {r3}\n", file);
18796 fputs ("\tldr\tr3, ", file);
18800 fputs ("\tldr\tr12, ", file);
18802 assemble_name (file, label);
18803 fputc ('\n', file);
18806 /* If we are generating PIC, the ldr instruction below loads
18807 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
18808 the address of the add + 8, so we have:
18810 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
18813 Note that we have "+ 1" because some versions of GNU ld
18814 don't set the low bit of the result for R_ARM_REL32
18815 relocations against thumb function symbols.
18816 On ARMv6M this is +4, not +8. */
18817 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
18818 assemble_name (file, labelpc);
18819 fputs (":\n", file);
18820 if (TARGET_THUMB1_ONLY)
18822 /* This is 2 insns after the start of the thunk, so we know it
18823 is 4-byte aligned. */
18824 fputs ("\tadd\tr3, pc, r3\n", file);
18825 fputs ("\tmov r12, r3\n", file);
18828 fputs ("\tadd\tr12, pc, r12\n", file);
18830 else if (TARGET_THUMB1_ONLY)
18831 fputs ("\tmov r12, r3\n", file);
18833 if (TARGET_THUMB1_ONLY)
18835 if (mi_delta > 255)
18837 fputs ("\tldr\tr3, ", file);
18838 assemble_name (file, label);
18839 fputs ("+4\n", file);
18840 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
18841 mi_op, this_regno, this_regno);
18843 else if (mi_delta != 0)
18845 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
18846 mi_op, this_regno, this_regno,
18852 /* TODO: Use movw/movt for large constants when available. */
18853 while (mi_delta != 0)
18855 if ((mi_delta & (3 << shift)) == 0)
18859 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
18860 mi_op, this_regno, this_regno,
18861 mi_delta & (0xff << shift));
18862 mi_delta &= ~(0xff << shift);
18869 if (TARGET_THUMB1_ONLY)
18870 fputs ("\tpop\t{r3}\n", file);
18872 fprintf (file, "\tbx\tr12\n");
18873 ASM_OUTPUT_ALIGN (file, 2);
18874 assemble_name (file, label);
18875 fputs (":\n", file);
18878 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
18879 rtx tem = XEXP (DECL_RTL (function), 0);
18880 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
18881 tem = gen_rtx_MINUS (GET_MODE (tem),
18883 gen_rtx_SYMBOL_REF (Pmode,
18884 ggc_strdup (labelpc)));
18885 assemble_integer (tem, 4, BITS_PER_WORD, 1);
18888 /* Output ".word .LTHUNKn". */
18889 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
18891 if (TARGET_THUMB1_ONLY && mi_delta > 255)
18892 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
18896 fputs ("\tb\t", file);
18897 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
18898 if (NEED_PLT_RELOC)
18899 fputs ("(PLT)", file);
18900 fputc ('\n', file);
18905 arm_emit_vector_const (FILE *file, rtx x)
18908 const char * pattern;
18910 gcc_assert (GET_CODE (x) == CONST_VECTOR);
18912 switch (GET_MODE (x))
18914 case V2SImode: pattern = "%08x"; break;
18915 case V4HImode: pattern = "%04x"; break;
18916 case V8QImode: pattern = "%02x"; break;
18917 default: gcc_unreachable ();
18920 fprintf (file, "0x");
18921 for (i = CONST_VECTOR_NUNITS (x); i--;)
18925 element = CONST_VECTOR_ELT (x, i);
18926 fprintf (file, pattern, INTVAL (element));
18932 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
18933 HFmode constant pool entries are actually loaded with ldr. */
18935 arm_emit_fp16_const (rtx c)
18940 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
18941 bits = real_to_target (NULL, &r, HFmode);
18942 if (WORDS_BIG_ENDIAN)
18943 assemble_zeros (2);
18944 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
18945 if (!WORDS_BIG_ENDIAN)
18946 assemble_zeros (2);
18950 arm_output_load_gr (rtx *operands)
18957 if (GET_CODE (operands [1]) != MEM
18958 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
18959 || GET_CODE (reg = XEXP (sum, 0)) != REG
18960 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
18961 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
18962 return "wldrw%?\t%0, %1";
18964 /* Fix up an out-of-range load of a GR register. */
18965 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
18966 wcgr = operands[0];
18968 output_asm_insn ("ldr%?\t%0, %1", operands);
18970 operands[0] = wcgr;
18972 output_asm_insn ("tmcr%?\t%0, %1", operands);
18973 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
18978 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
18980 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
18981 named arg and all anonymous args onto the stack.
18982 XXX I know the prologue shouldn't be pushing registers, but it is faster
18986 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
18987 enum machine_mode mode,
18990 int second_time ATTRIBUTE_UNUSED)
18992 int nregs = cum->nregs;
18994 && ARM_DOUBLEWORD_ALIGN
18995 && arm_needs_doubleword_align (mode, type))
18998 cfun->machine->uses_anonymous_args = 1;
18999 if (nregs < NUM_ARG_REGS)
19000 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
19003 /* Return nonzero if the CONSUMER instruction (a store) does not need
19004 PRODUCER's value to calculate the address. */
19007 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
19009 rtx value = PATTERN (producer);
19010 rtx addr = PATTERN (consumer);
19012 if (GET_CODE (value) == COND_EXEC)
19013 value = COND_EXEC_CODE (value);
19014 if (GET_CODE (value) == PARALLEL)
19015 value = XVECEXP (value, 0, 0);
19016 value = XEXP (value, 0);
19017 if (GET_CODE (addr) == COND_EXEC)
19018 addr = COND_EXEC_CODE (addr);
19019 if (GET_CODE (addr) == PARALLEL)
19020 addr = XVECEXP (addr, 0, 0);
19021 addr = XEXP (addr, 0);
19023 return !reg_overlap_mentioned_p (value, addr);
19026 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
19027 have an early register shift value or amount dependency on the
19028 result of PRODUCER. */
19031 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
19033 rtx value = PATTERN (producer);
19034 rtx op = PATTERN (consumer);
19037 if (GET_CODE (value) == COND_EXEC)
19038 value = COND_EXEC_CODE (value);
19039 if (GET_CODE (value) == PARALLEL)
19040 value = XVECEXP (value, 0, 0);
19041 value = XEXP (value, 0);
19042 if (GET_CODE (op) == COND_EXEC)
19043 op = COND_EXEC_CODE (op);
19044 if (GET_CODE (op) == PARALLEL)
19045 op = XVECEXP (op, 0, 0);
19048 early_op = XEXP (op, 0);
19049 /* This is either an actual independent shift, or a shift applied to
19050 the first operand of another operation. We want the whole shift
19052 if (GET_CODE (early_op) == REG)
19055 return !reg_overlap_mentioned_p (value, early_op);
19058 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
19059 have an early register shift value dependency on the result of
19063 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
19065 rtx value = PATTERN (producer);
19066 rtx op = PATTERN (consumer);
19069 if (GET_CODE (value) == COND_EXEC)
19070 value = COND_EXEC_CODE (value);
19071 if (GET_CODE (value) == PARALLEL)
19072 value = XVECEXP (value, 0, 0);
19073 value = XEXP (value, 0);
19074 if (GET_CODE (op) == COND_EXEC)
19075 op = COND_EXEC_CODE (op);
19076 if (GET_CODE (op) == PARALLEL)
19077 op = XVECEXP (op, 0, 0);
19080 early_op = XEXP (op, 0);
19082 /* This is either an actual independent shift, or a shift applied to
19083 the first operand of another operation. We want the value being
19084 shifted, in either case. */
19085 if (GET_CODE (early_op) != REG)
19086 early_op = XEXP (early_op, 0);
19088 return !reg_overlap_mentioned_p (value, early_op);
19091 /* Return nonzero if the CONSUMER (a mul or mac op) does not
19092 have an early register mult dependency on the result of
19096 arm_no_early_mul_dep (rtx producer, rtx consumer)
19098 rtx value = PATTERN (producer);
19099 rtx op = PATTERN (consumer);
19101 if (GET_CODE (value) == COND_EXEC)
19102 value = COND_EXEC_CODE (value);
19103 if (GET_CODE (value) == PARALLEL)
19104 value = XVECEXP (value, 0, 0);
19105 value = XEXP (value, 0);
19106 if (GET_CODE (op) == COND_EXEC)
19107 op = COND_EXEC_CODE (op);
19108 if (GET_CODE (op) == PARALLEL)
19109 op = XVECEXP (op, 0, 0);
19112 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
19114 if (GET_CODE (XEXP (op, 0)) == MULT)
19115 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
19117 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
19123 /* We can't rely on the caller doing the proper promotion when
19124 using APCS or ATPCS. */
19127 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
19129 return !TARGET_AAPCS_BASED;
19133 /* AAPCS based ABIs use short enums by default. */
19136 arm_default_short_enums (void)
19138 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
19142 /* AAPCS requires that anonymous bitfields affect structure alignment. */
19145 arm_align_anon_bitfield (void)
19147 return TARGET_AAPCS_BASED;
19151 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
19154 arm_cxx_guard_type (void)
19156 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
19159 /* Return non-zero if the consumer (a multiply-accumulate instruction)
19160 has an accumulator dependency on the result of the producer (a
19161 multiplication instruction) and no other dependency on that result. */
19163 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
19165 rtx mul = PATTERN (producer);
19166 rtx mac = PATTERN (consumer);
19168 rtx mac_op0, mac_op1, mac_acc;
19170 if (GET_CODE (mul) == COND_EXEC)
19171 mul = COND_EXEC_CODE (mul);
19172 if (GET_CODE (mac) == COND_EXEC)
19173 mac = COND_EXEC_CODE (mac);
19175 /* Check that mul is of the form (set (...) (mult ...))
19176 and mla is of the form (set (...) (plus (mult ...) (...))). */
19177 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
19178 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
19179 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
19182 mul_result = XEXP (mul, 0);
19183 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
19184 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
19185 mac_acc = XEXP (XEXP (mac, 1), 1);
19187 return (reg_overlap_mentioned_p (mul_result, mac_acc)
19188 && !reg_overlap_mentioned_p (mul_result, mac_op0)
19189 && !reg_overlap_mentioned_p (mul_result, mac_op1));
19193 /* The EABI says test the least significant bit of a guard variable. */
19196 arm_cxx_guard_mask_bit (void)
19198 return TARGET_AAPCS_BASED;
19202 /* The EABI specifies that all array cookies are 8 bytes long. */
19205 arm_get_cookie_size (tree type)
19209 if (!TARGET_AAPCS_BASED)
19210 return default_cxx_get_cookie_size (type);
19212 size = build_int_cst (sizetype, 8);
19217 /* The EABI says that array cookies should also contain the element size. */
19220 arm_cookie_has_size (void)
19222 return TARGET_AAPCS_BASED;
19226 /* The EABI says constructors and destructors should return a pointer to
19227 the object constructed/destroyed. */
19230 arm_cxx_cdtor_returns_this (void)
19232 return TARGET_AAPCS_BASED;
19235 /* The EABI says that an inline function may never be the key
19239 arm_cxx_key_method_may_be_inline (void)
19241 return !TARGET_AAPCS_BASED;
19245 arm_cxx_determine_class_data_visibility (tree decl)
19247 if (!TARGET_AAPCS_BASED
19248 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
19251 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
19252 is exported. However, on systems without dynamic vague linkage,
19253 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
19254 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
19255 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
19257 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
19258 DECL_VISIBILITY_SPECIFIED (decl) = 1;
19262 arm_cxx_class_data_always_comdat (void)
19264 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
19265 vague linkage if the class has no key function. */
19266 return !TARGET_AAPCS_BASED;
19270 /* The EABI says __aeabi_atexit should be used to register static
19274 arm_cxx_use_aeabi_atexit (void)
19276 return TARGET_AAPCS_BASED;
19281 arm_set_return_address (rtx source, rtx scratch)
19283 arm_stack_offsets *offsets;
19284 HOST_WIDE_INT delta;
19286 unsigned long saved_regs;
19288 offsets = arm_get_frame_offsets ();
19289 saved_regs = offsets->saved_regs_mask;
19291 if ((saved_regs & (1 << LR_REGNUM)) == 0)
19292 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
19295 if (frame_pointer_needed)
19296 addr = plus_constant(hard_frame_pointer_rtx, -4);
19299 /* LR will be the first saved register. */
19300 delta = offsets->outgoing_args - (offsets->frame + 4);
19305 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
19306 GEN_INT (delta & ~4095)));
19311 addr = stack_pointer_rtx;
19313 addr = plus_constant (addr, delta);
19315 emit_move_insn (gen_frame_mem (Pmode, addr), source);
19321 thumb_set_return_address (rtx source, rtx scratch)
19323 arm_stack_offsets *offsets;
19324 HOST_WIDE_INT delta;
19325 HOST_WIDE_INT limit;
19328 unsigned long mask;
19332 offsets = arm_get_frame_offsets ();
19333 mask = offsets->saved_regs_mask;
19334 if (mask & (1 << LR_REGNUM))
19337 /* Find the saved regs. */
19338 if (frame_pointer_needed)
19340 delta = offsets->soft_frame - offsets->saved_args;
19341 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
19347 delta = offsets->outgoing_args - offsets->saved_args;
19350 /* Allow for the stack frame. */
19351 if (TARGET_THUMB1 && TARGET_BACKTRACE)
19353 /* The link register is always the first saved register. */
19356 /* Construct the address. */
19357 addr = gen_rtx_REG (SImode, reg);
19360 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
19361 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
19365 addr = plus_constant (addr, delta);
19367 emit_move_insn (gen_frame_mem (Pmode, addr), source);
19370 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
19373 /* Implements target hook vector_mode_supported_p. */
19375 arm_vector_mode_supported_p (enum machine_mode mode)
19377 /* Neon also supports V2SImode, etc. listed in the clause below. */
19378 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
19379 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
19382 if ((mode == V2SImode)
19383 || (mode == V4HImode)
19384 || (mode == V8QImode))
19390 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
19391 ARM insns and therefore guarantee that the shift count is modulo 256.
19392 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
19393 guarantee no particular behavior for out-of-range counts. */
19395 static unsigned HOST_WIDE_INT
19396 arm_shift_truncation_mask (enum machine_mode mode)
19398 return mode == SImode ? 255 : 0;
19402 /* Map internal gcc register numbers to DWARF2 register numbers. */
19405 arm_dbx_register_number (unsigned int regno)
19410 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
19411 compatibility. The EABI defines them as registers 96-103. */
19412 if (IS_FPA_REGNUM (regno))
19413 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
19415 /* FIXME: VFPv3 register numbering. */
19416 if (IS_VFP_REGNUM (regno))
19417 return 64 + regno - FIRST_VFP_REGNUM;
19419 if (IS_IWMMXT_GR_REGNUM (regno))
19420 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
19422 if (IS_IWMMXT_REGNUM (regno))
19423 return 112 + regno - FIRST_IWMMXT_REGNUM;
19425 gcc_unreachable ();
19429 #ifdef TARGET_UNWIND_INFO
19430 /* Emit unwind directives for a store-multiple instruction or stack pointer
19431 push during alignment.
19432 These should only ever be generated by the function prologue code, so
19433 expect them to have a particular form. */
19436 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
19439 HOST_WIDE_INT offset;
19440 HOST_WIDE_INT nregs;
19446 e = XVECEXP (p, 0, 0);
19447 if (GET_CODE (e) != SET)
19450 /* First insn will adjust the stack pointer. */
19451 if (GET_CODE (e) != SET
19452 || GET_CODE (XEXP (e, 0)) != REG
19453 || REGNO (XEXP (e, 0)) != SP_REGNUM
19454 || GET_CODE (XEXP (e, 1)) != PLUS)
19457 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
19458 nregs = XVECLEN (p, 0) - 1;
19460 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
19463 /* The function prologue may also push pc, but not annotate it as it is
19464 never restored. We turn this into a stack pointer adjustment. */
19465 if (nregs * 4 == offset - 4)
19467 fprintf (asm_out_file, "\t.pad #4\n");
19471 fprintf (asm_out_file, "\t.save {");
19473 else if (IS_VFP_REGNUM (reg))
19476 fprintf (asm_out_file, "\t.vsave {");
19478 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
19480 /* FPA registers are done differently. */
19481 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
19485 /* Unknown register type. */
19488 /* If the stack increment doesn't match the size of the saved registers,
19489 something has gone horribly wrong. */
19490 if (offset != nregs * reg_size)
19495 /* The remaining insns will describe the stores. */
19496 for (i = 1; i <= nregs; i++)
19498 /* Expect (set (mem <addr>) (reg)).
19499 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
19500 e = XVECEXP (p, 0, i);
19501 if (GET_CODE (e) != SET
19502 || GET_CODE (XEXP (e, 0)) != MEM
19503 || GET_CODE (XEXP (e, 1)) != REG)
19506 reg = REGNO (XEXP (e, 1));
19511 fprintf (asm_out_file, ", ");
19512 /* We can't use %r for vfp because we need to use the
19513 double precision register names. */
19514 if (IS_VFP_REGNUM (reg))
19515 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
19517 asm_fprintf (asm_out_file, "%r", reg);
19519 #ifdef ENABLE_CHECKING
19520 /* Check that the addresses are consecutive. */
19521 e = XEXP (XEXP (e, 0), 0);
19522 if (GET_CODE (e) == PLUS)
19524 offset += reg_size;
19525 if (GET_CODE (XEXP (e, 0)) != REG
19526 || REGNO (XEXP (e, 0)) != SP_REGNUM
19527 || GET_CODE (XEXP (e, 1)) != CONST_INT
19528 || offset != INTVAL (XEXP (e, 1)))
19532 || GET_CODE (e) != REG
19533 || REGNO (e) != SP_REGNUM)
19537 fprintf (asm_out_file, "}\n");
19540 /* Emit unwind directives for a SET. */
19543 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
19551 switch (GET_CODE (e0))
19554 /* Pushing a single register. */
19555 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
19556 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
19557 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
19560 asm_fprintf (asm_out_file, "\t.save ");
19561 if (IS_VFP_REGNUM (REGNO (e1)))
19562 asm_fprintf(asm_out_file, "{d%d}\n",
19563 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
19565 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
19569 if (REGNO (e0) == SP_REGNUM)
19571 /* A stack increment. */
19572 if (GET_CODE (e1) != PLUS
19573 || GET_CODE (XEXP (e1, 0)) != REG
19574 || REGNO (XEXP (e1, 0)) != SP_REGNUM
19575 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
19578 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
19579 -INTVAL (XEXP (e1, 1)));
19581 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
19583 HOST_WIDE_INT offset;
19585 if (GET_CODE (e1) == PLUS)
19587 if (GET_CODE (XEXP (e1, 0)) != REG
19588 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
19590 reg = REGNO (XEXP (e1, 0));
19591 offset = INTVAL (XEXP (e1, 1));
19592 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
19593 HARD_FRAME_POINTER_REGNUM, reg,
19594 INTVAL (XEXP (e1, 1)));
19596 else if (GET_CODE (e1) == REG)
19599 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
19600 HARD_FRAME_POINTER_REGNUM, reg);
19605 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
19607 /* Move from sp to reg. */
19608 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
19610 else if (GET_CODE (e1) == PLUS
19611 && GET_CODE (XEXP (e1, 0)) == REG
19612 && REGNO (XEXP (e1, 0)) == SP_REGNUM
19613 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
19615 /* Set reg to offset from sp. */
19616 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
19617 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
19619 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
19621 /* Stack pointer save before alignment. */
19623 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
19636 /* Emit unwind directives for the given insn. */
19639 arm_unwind_emit (FILE * asm_out_file, rtx insn)
19643 if (!ARM_EABI_UNWIND_TABLES)
19646 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
19647 && (TREE_NOTHROW (current_function_decl)
19648 || crtl->all_throwers_are_sibcalls))
19651 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
19654 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
19656 pat = XEXP (pat, 0);
19658 pat = PATTERN (insn);
19660 switch (GET_CODE (pat))
19663 arm_unwind_emit_set (asm_out_file, pat);
19667 /* Store multiple. */
19668 arm_unwind_emit_sequence (asm_out_file, pat);
19677 /* Output a reference from a function exception table to the type_info
19678 object X. The EABI specifies that the symbol should be relocated by
19679 an R_ARM_TARGET2 relocation. */
19682 arm_output_ttype (rtx x)
19684 fputs ("\t.word\t", asm_out_file);
19685 output_addr_const (asm_out_file, x);
19686 /* Use special relocations for symbol references. */
19687 if (GET_CODE (x) != CONST_INT)
19688 fputs ("(TARGET2)", asm_out_file);
19689 fputc ('\n', asm_out_file);
19693 #endif /* TARGET_UNWIND_INFO */
19696 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
19697 stack alignment. */
19700 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
19702 rtx unspec = SET_SRC (pattern);
19703 gcc_assert (GET_CODE (unspec) == UNSPEC);
19707 case UNSPEC_STACK_ALIGN:
19708 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
19709 put anything on the stack, so hopefully it won't matter.
19710 CFA = SP will be correct after alignment. */
19711 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
19712 SET_DEST (pattern));
19715 gcc_unreachable ();
19720 /* Output unwind directives for the start/end of a function. */
19723 arm_output_fn_unwind (FILE * f, bool prologue)
19725 if (!ARM_EABI_UNWIND_TABLES)
19729 fputs ("\t.fnstart\n", f);
19732 /* If this function will never be unwound, then mark it as such.
19733 The came condition is used in arm_unwind_emit to suppress
19734 the frame annotations. */
19735 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
19736 && (TREE_NOTHROW (current_function_decl)
19737 || crtl->all_throwers_are_sibcalls))
19738 fputs("\t.cantunwind\n", f);
19740 fputs ("\t.fnend\n", f);
19745 arm_emit_tls_decoration (FILE *fp, rtx x)
19747 enum tls_reloc reloc;
19750 val = XVECEXP (x, 0, 0);
19751 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
19753 output_addr_const (fp, val);
19758 fputs ("(tlsgd)", fp);
19761 fputs ("(tlsldm)", fp);
19764 fputs ("(tlsldo)", fp);
19767 fputs ("(gottpoff)", fp);
19770 fputs ("(tpoff)", fp);
19773 gcc_unreachable ();
19781 fputs (" + (. - ", fp);
19782 output_addr_const (fp, XVECEXP (x, 0, 2));
19784 output_addr_const (fp, XVECEXP (x, 0, 3));
19794 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
19797 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
19799 gcc_assert (size == 4);
19800 fputs ("\t.word\t", file);
19801 output_addr_const (file, x);
19802 fputs ("(tlsldo)", file);
19806 arm_output_addr_const_extra (FILE *fp, rtx x)
19808 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
19809 return arm_emit_tls_decoration (fp, x);
19810 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
19813 int labelno = INTVAL (XVECEXP (x, 0, 0));
19815 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
19816 assemble_name_raw (fp, label);
19820 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
19822 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
19826 output_addr_const (fp, XVECEXP (x, 0, 0));
19830 else if (GET_CODE (x) == CONST_VECTOR)
19831 return arm_emit_vector_const (fp, x);
19836 /* Output assembly for a shift instruction.
19837 SET_FLAGS determines how the instruction modifies the condition codes.
19838 0 - Do not set condition codes.
19839 1 - Set condition codes.
19840 2 - Use smallest instruction. */
19842 arm_output_shift(rtx * operands, int set_flags)
19845 static const char flag_chars[3] = {'?', '.', '!'};
19850 c = flag_chars[set_flags];
19851 if (TARGET_UNIFIED_ASM)
19853 shift = shift_op(operands[3], &val);
19857 operands[2] = GEN_INT(val);
19858 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
19861 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
19864 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
19865 output_asm_insn (pattern, operands);
19869 /* Output a Thumb-1 casesi dispatch sequence. */
19871 thumb1_output_casesi (rtx *operands)
19873 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
19874 addr_diff_vec_flags flags;
19876 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
19878 flags = ADDR_DIFF_VEC_FLAGS (diff_vec);
19880 switch (GET_MODE(diff_vec))
19883 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
19884 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
19886 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
19887 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
19889 return "bl\t%___gnu_thumb1_case_si";
19891 gcc_unreachable ();
19895 /* Output a Thumb-2 casesi instruction. */
19897 thumb2_output_casesi (rtx *operands)
19899 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
19901 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
19903 output_asm_insn ("cmp\t%0, %1", operands);
19904 output_asm_insn ("bhi\t%l3", operands);
19905 switch (GET_MODE(diff_vec))
19908 return "tbb\t[%|pc, %0]";
19910 return "tbh\t[%|pc, %0, lsl #1]";
19914 output_asm_insn ("adr\t%4, %l2", operands);
19915 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
19916 output_asm_insn ("add\t%4, %4, %5", operands);
19921 output_asm_insn ("adr\t%4, %l2", operands);
19922 return "ldr\t%|pc, [%4, %0, lsl #2]";
19925 gcc_unreachable ();
19929 /* Most ARM cores are single issue, but some newer ones can dual issue.
19930 The scheduler descriptions rely on this being correct. */
19932 arm_issue_rate (void)
19947 /* A table and a function to perform ARM-specific name mangling for
19948 NEON vector types in order to conform to the AAPCS (see "Procedure
19949 Call Standard for the ARM Architecture", Appendix A). To qualify
19950 for emission with the mangled names defined in that document, a
19951 vector type must not only be of the correct mode but also be
19952 composed of NEON vector element types (e.g. __builtin_neon_qi). */
19955 enum machine_mode mode;
19956 const char *element_type_name;
19957 const char *aapcs_name;
19958 } arm_mangle_map_entry;
19960 static arm_mangle_map_entry arm_mangle_map[] = {
19961 /* 64-bit containerized types. */
19962 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
19963 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
19964 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
19965 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
19966 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
19967 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
19968 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
19969 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
19970 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
19971 /* 128-bit containerized types. */
19972 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
19973 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
19974 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
19975 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
19976 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
19977 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
19978 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
19979 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
19980 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
19981 { VOIDmode, NULL, NULL }
19985 arm_mangle_type (const_tree type)
19987 arm_mangle_map_entry *pos = arm_mangle_map;
19989 /* The ARM ABI documents (10th October 2008) say that "__va_list"
19990 has to be managled as if it is in the "std" namespace. */
19991 if (TARGET_AAPCS_BASED
19992 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
19994 static bool warned;
19995 if (!warned && warn_psabi)
19998 inform (input_location,
19999 "the mangling of %<va_list%> has changed in GCC 4.4");
20001 return "St9__va_list";
20004 /* Half-precision float. */
20005 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
20008 if (TREE_CODE (type) != VECTOR_TYPE)
20011 /* Check the mode of the vector type, and the name of the vector
20012 element type, against the table. */
20013 while (pos->mode != VOIDmode)
20015 tree elt_type = TREE_TYPE (type);
20017 if (pos->mode == TYPE_MODE (type)
20018 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
20019 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
20020 pos->element_type_name))
20021 return pos->aapcs_name;
20026 /* Use the default mangling for unrecognized (possibly user-defined)
20031 /* Order of allocation of core registers for Thumb: this allocation is
20032 written over the corresponding initial entries of the array
20033 initialized with REG_ALLOC_ORDER. We allocate all low registers
20034 first. Saving and restoring a low register is usually cheaper than
20035 using a call-clobbered high register. */
20037 static const int thumb_core_reg_alloc_order[] =
20039 3, 2, 1, 0, 4, 5, 6, 7,
20040 14, 12, 8, 9, 10, 11, 13, 15
20043 /* Adjust register allocation order when compiling for Thumb. */
20046 arm_order_regs_for_local_alloc (void)
20048 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
20049 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
20051 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
20052 sizeof (thumb_core_reg_alloc_order));
20055 /* Set default optimization options. */
20057 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
20059 /* Enable section anchors by default at -O1 or higher.
20060 Use 2 to distinguish from an explicit -fsection-anchors
20061 given on the command line. */
20063 flag_section_anchors = 2;
20066 #include "gt-arm.h"