1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
49 #include "c-family/c-pragma.h" /* ??? */
50 #include "integrate.h"
53 #include "target-def.h"
55 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
67 /* Forward function declarations. */
68 static int arm_compute_static_chain_stack_bytes (void);
69 static arm_stack_offsets *arm_get_frame_offsets (void);
70 static void arm_add_gc_roots (void);
71 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
72 HOST_WIDE_INT, rtx, rtx, int, int);
73 static unsigned bit_count (unsigned long);
74 static int arm_address_register_rtx_p (rtx, int);
75 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
76 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
77 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
78 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
79 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
80 inline static int thumb1_index_register_rtx_p (rtx, int);
81 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
82 static int thumb_far_jump_used_p (void);
83 static bool thumb_force_lr_save (void);
84 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
85 static rtx emit_sfm (int, int);
86 static unsigned arm_size_return_regs (void);
87 static bool arm_assemble_integer (rtx, unsigned int, int);
88 static void arm_print_operand (FILE *, rtx, int);
89 static void arm_print_operand_address (FILE *, rtx);
90 static bool arm_print_operand_punct_valid_p (unsigned char code);
91 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
92 static arm_cc get_arm_condition_code (rtx);
93 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
94 static rtx is_jump_table (rtx);
95 static const char *output_multi_immediate (rtx *, const char *, const char *,
97 static const char *shift_op (rtx, HOST_WIDE_INT *);
98 static struct machine_function *arm_init_machine_status (void);
99 static void thumb_exit (FILE *, int);
100 static rtx is_jump_table (rtx);
101 static HOST_WIDE_INT get_jump_table_size (rtx);
102 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
103 static Mnode *add_minipool_forward_ref (Mfix *);
104 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
105 static Mnode *add_minipool_backward_ref (Mfix *);
106 static void assign_minipool_offsets (Mfix *);
107 static void arm_print_value (FILE *, rtx);
108 static void dump_minipool (rtx);
109 static int arm_barrier_cost (rtx);
110 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
111 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
112 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
114 static void arm_reorg (void);
115 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
116 static unsigned long arm_compute_save_reg0_reg12_mask (void);
117 static unsigned long arm_compute_save_reg_mask (void);
118 static unsigned long arm_isr_value (tree);
119 static unsigned long arm_compute_func_type (void);
120 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
121 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
123 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
124 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
126 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
127 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
128 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static int arm_comp_type_attributes (const_tree, const_tree);
130 static void arm_set_default_type_attributes (tree);
131 static int arm_adjust_cost (rtx, rtx, rtx, int);
132 static int count_insns_for_constant (HOST_WIDE_INT, int);
133 static int arm_get_strip_length (int);
134 static bool arm_function_ok_for_sibcall (tree, tree);
135 static enum machine_mode arm_promote_function_mode (const_tree,
136 enum machine_mode, int *,
138 static bool arm_return_in_memory (const_tree, const_tree);
139 static rtx arm_function_value (const_tree, const_tree, bool);
140 static rtx arm_libcall_value (enum machine_mode, const_rtx);
142 static void arm_internal_label (FILE *, const char *, unsigned long);
143 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
145 static bool arm_have_conditional_execution (void);
146 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
147 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
148 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
149 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_rtx_costs (rtx, int, int, int *, bool);
153 static int arm_address_cost (rtx, bool);
154 static bool arm_memory_load_p (rtx);
155 static bool arm_cirrus_insn_p (rtx);
156 static void cirrus_reorg (rtx);
157 static void arm_init_builtins (void);
158 static void arm_init_iwmmxt_builtins (void);
159 static rtx safe_vector_operand (rtx, enum machine_mode);
160 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
161 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
162 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
163 static void emit_constant_insn (rtx cond, rtx pattern);
164 static rtx emit_set_insn (rtx, rtx);
165 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
167 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
169 static int aapcs_select_return_coproc (const_tree, const_tree);
171 #ifdef OBJECT_FORMAT_ELF
172 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
173 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
176 static void arm_encode_section_info (tree, rtx, int);
179 static void arm_file_end (void);
180 static void arm_file_start (void);
182 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
184 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
185 enum machine_mode, const_tree, bool);
186 static bool arm_promote_prototypes (const_tree);
187 static bool arm_default_short_enums (void);
188 static bool arm_align_anon_bitfield (void);
189 static bool arm_return_in_msb (const_tree);
190 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
191 static bool arm_return_in_memory (const_tree, const_tree);
192 #ifdef TARGET_UNWIND_INFO
193 static void arm_unwind_emit (FILE *, rtx);
194 static bool arm_output_ttype (rtx);
196 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
197 static rtx arm_dwarf_register_span (rtx);
199 static tree arm_cxx_guard_type (void);
200 static bool arm_cxx_guard_mask_bit (void);
201 static tree arm_get_cookie_size (tree);
202 static bool arm_cookie_has_size (void);
203 static bool arm_cxx_cdtor_returns_this (void);
204 static bool arm_cxx_key_method_may_be_inline (void);
205 static void arm_cxx_determine_class_data_visibility (tree);
206 static bool arm_cxx_class_data_always_comdat (void);
207 static bool arm_cxx_use_aeabi_atexit (void);
208 static void arm_init_libfuncs (void);
209 static tree arm_build_builtin_va_list (void);
210 static void arm_expand_builtin_va_start (tree, rtx);
211 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
212 static bool arm_handle_option (size_t, const char *, int);
213 static void arm_target_help (void);
214 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
215 static bool arm_cannot_copy_insn_p (rtx);
216 static bool arm_tls_symbol_p (rtx x);
217 static int arm_issue_rate (void);
218 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
219 static bool arm_allocate_stack_slots_for_args (void);
220 static const char *arm_invalid_parameter_type (const_tree t);
221 static const char *arm_invalid_return_type (const_tree t);
222 static tree arm_promoted_type (const_tree t);
223 static tree arm_convert_to_type (tree type, tree expr);
224 static bool arm_scalar_mode_supported_p (enum machine_mode);
225 static bool arm_frame_pointer_required (void);
226 static bool arm_can_eliminate (const int, const int);
227 static void arm_asm_trampoline_template (FILE *);
228 static void arm_trampoline_init (rtx, tree, rtx);
229 static rtx arm_trampoline_adjust_address (rtx);
230 static rtx arm_pic_static_addr (rtx orig, rtx reg);
233 /* Table of machine attributes. */
234 static const struct attribute_spec arm_attribute_table[] =
236 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
237 /* Function calls made to this symbol must be done indirectly, because
238 it may lie outside of the 26 bit addressing range of a normal function
240 { "long_call", 0, 0, false, true, true, NULL },
241 /* Whereas these functions are always known to reside within the 26 bit
243 { "short_call", 0, 0, false, true, true, NULL },
244 /* Specify the procedure call conventions for a function. */
245 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
246 /* Interrupt Service Routines have special prologue and epilogue requirements. */
247 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
248 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
249 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
251 /* ARM/PE has three new attributes:
253 dllexport - for exporting a function/variable that will live in a dll
254 dllimport - for importing a function/variable from a dll
256 Microsoft allows multiple declspecs in one __declspec, separating
257 them with spaces. We do NOT support this. Instead, use __declspec
260 { "dllimport", 0, 0, true, false, false, NULL },
261 { "dllexport", 0, 0, true, false, false, NULL },
262 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
263 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
264 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
265 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
266 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
268 { NULL, 0, 0, false, false, false, NULL }
271 /* Initialize the GCC target structure. */
272 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
273 #undef TARGET_MERGE_DECL_ATTRIBUTES
274 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
277 #undef TARGET_LEGITIMIZE_ADDRESS
278 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
280 #undef TARGET_ATTRIBUTE_TABLE
281 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
283 #undef TARGET_ASM_FILE_START
284 #define TARGET_ASM_FILE_START arm_file_start
285 #undef TARGET_ASM_FILE_END
286 #define TARGET_ASM_FILE_END arm_file_end
288 #undef TARGET_ASM_ALIGNED_SI_OP
289 #define TARGET_ASM_ALIGNED_SI_OP NULL
290 #undef TARGET_ASM_INTEGER
291 #define TARGET_ASM_INTEGER arm_assemble_integer
293 #undef TARGET_PRINT_OPERAND
294 #define TARGET_PRINT_OPERAND arm_print_operand
295 #undef TARGET_PRINT_OPERAND_ADDRESS
296 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
297 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
298 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
300 #undef TARGET_ASM_FUNCTION_PROLOGUE
301 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
303 #undef TARGET_ASM_FUNCTION_EPILOGUE
304 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
306 #undef TARGET_DEFAULT_TARGET_FLAGS
307 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
308 #undef TARGET_HANDLE_OPTION
309 #define TARGET_HANDLE_OPTION arm_handle_option
311 #define TARGET_HELP arm_target_help
313 #undef TARGET_COMP_TYPE_ATTRIBUTES
314 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
316 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
317 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
319 #undef TARGET_SCHED_ADJUST_COST
320 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
322 #undef TARGET_ENCODE_SECTION_INFO
324 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
326 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
329 #undef TARGET_STRIP_NAME_ENCODING
330 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
332 #undef TARGET_ASM_INTERNAL_LABEL
333 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
335 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
336 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
338 #undef TARGET_FUNCTION_VALUE
339 #define TARGET_FUNCTION_VALUE arm_function_value
341 #undef TARGET_LIBCALL_VALUE
342 #define TARGET_LIBCALL_VALUE arm_libcall_value
344 #undef TARGET_ASM_OUTPUT_MI_THUNK
345 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
346 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
347 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
349 #undef TARGET_RTX_COSTS
350 #define TARGET_RTX_COSTS arm_rtx_costs
351 #undef TARGET_ADDRESS_COST
352 #define TARGET_ADDRESS_COST arm_address_cost
354 #undef TARGET_SHIFT_TRUNCATION_MASK
355 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
356 #undef TARGET_VECTOR_MODE_SUPPORTED_P
357 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
359 #undef TARGET_MACHINE_DEPENDENT_REORG
360 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
362 #undef TARGET_INIT_BUILTINS
363 #define TARGET_INIT_BUILTINS arm_init_builtins
364 #undef TARGET_EXPAND_BUILTIN
365 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
367 #undef TARGET_INIT_LIBFUNCS
368 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
370 #undef TARGET_PROMOTE_FUNCTION_MODE
371 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
372 #undef TARGET_PROMOTE_PROTOTYPES
373 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
374 #undef TARGET_PASS_BY_REFERENCE
375 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
376 #undef TARGET_ARG_PARTIAL_BYTES
377 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
379 #undef TARGET_SETUP_INCOMING_VARARGS
380 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
382 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
383 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
385 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
386 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
387 #undef TARGET_TRAMPOLINE_INIT
388 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
389 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
390 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
392 #undef TARGET_DEFAULT_SHORT_ENUMS
393 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
395 #undef TARGET_ALIGN_ANON_BITFIELD
396 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
398 #undef TARGET_NARROW_VOLATILE_BITFIELD
399 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
401 #undef TARGET_CXX_GUARD_TYPE
402 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
404 #undef TARGET_CXX_GUARD_MASK_BIT
405 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
407 #undef TARGET_CXX_GET_COOKIE_SIZE
408 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
410 #undef TARGET_CXX_COOKIE_HAS_SIZE
411 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
413 #undef TARGET_CXX_CDTOR_RETURNS_THIS
414 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
416 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
417 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
419 #undef TARGET_CXX_USE_AEABI_ATEXIT
420 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
422 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
423 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
424 arm_cxx_determine_class_data_visibility
426 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
427 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
429 #undef TARGET_RETURN_IN_MSB
430 #define TARGET_RETURN_IN_MSB arm_return_in_msb
432 #undef TARGET_RETURN_IN_MEMORY
433 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
435 #undef TARGET_MUST_PASS_IN_STACK
436 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
438 #ifdef TARGET_UNWIND_INFO
439 #undef TARGET_ASM_UNWIND_EMIT
440 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
442 /* EABI unwinding tables use a different format for the typeinfo tables. */
443 #undef TARGET_ASM_TTYPE
444 #define TARGET_ASM_TTYPE arm_output_ttype
446 #undef TARGET_ARM_EABI_UNWINDER
447 #define TARGET_ARM_EABI_UNWINDER true
448 #endif /* TARGET_UNWIND_INFO */
450 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
451 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
453 #undef TARGET_DWARF_REGISTER_SPAN
454 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
456 #undef TARGET_CANNOT_COPY_INSN_P
457 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
460 #undef TARGET_HAVE_TLS
461 #define TARGET_HAVE_TLS true
464 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
465 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
467 #undef TARGET_CANNOT_FORCE_CONST_MEM
468 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
470 #undef TARGET_MAX_ANCHOR_OFFSET
471 #define TARGET_MAX_ANCHOR_OFFSET 4095
473 /* The minimum is set such that the total size of the block
474 for a particular anchor is -4088 + 1 + 4095 bytes, which is
475 divisible by eight, ensuring natural spacing of anchors. */
476 #undef TARGET_MIN_ANCHOR_OFFSET
477 #define TARGET_MIN_ANCHOR_OFFSET -4088
479 #undef TARGET_SCHED_ISSUE_RATE
480 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
482 #undef TARGET_MANGLE_TYPE
483 #define TARGET_MANGLE_TYPE arm_mangle_type
485 #undef TARGET_BUILD_BUILTIN_VA_LIST
486 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
487 #undef TARGET_EXPAND_BUILTIN_VA_START
488 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
489 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
490 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
493 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
494 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
497 #undef TARGET_LEGITIMATE_ADDRESS_P
498 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
500 #undef TARGET_INVALID_PARAMETER_TYPE
501 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
503 #undef TARGET_INVALID_RETURN_TYPE
504 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
506 #undef TARGET_PROMOTED_TYPE
507 #define TARGET_PROMOTED_TYPE arm_promoted_type
509 #undef TARGET_CONVERT_TO_TYPE
510 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
512 #undef TARGET_SCALAR_MODE_SUPPORTED_P
513 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
515 #undef TARGET_FRAME_POINTER_REQUIRED
516 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
518 #undef TARGET_CAN_ELIMINATE
519 #define TARGET_CAN_ELIMINATE arm_can_eliminate
521 struct gcc_target targetm = TARGET_INITIALIZER;
523 /* Obstack for minipool constant handling. */
524 static struct obstack minipool_obstack;
525 static char * minipool_startobj;
527 /* The maximum number of insns skipped which
528 will be conditionalised if possible. */
529 static int max_insns_skipped = 5;
531 extern FILE * asm_out_file;
533 /* True if we are currently building a constant table. */
534 int making_const_table;
536 /* The processor for which instructions should be scheduled. */
537 enum processor_type arm_tune = arm_none;
539 /* The current tuning set. */
540 const struct tune_params *current_tune;
542 /* Which floating point hardware to schedule for. */
545 /* Which floating popint hardware to use. */
546 const struct arm_fpu_desc *arm_fpu_desc;
548 /* Whether to use floating point hardware. */
549 enum float_abi_type arm_float_abi;
551 /* Which __fp16 format to use. */
552 enum arm_fp16_format_type arm_fp16_format;
554 /* Which ABI to use. */
555 enum arm_abi_type arm_abi;
557 /* Which thread pointer model to use. */
558 enum arm_tp_type target_thread_pointer = TP_AUTO;
560 /* Used to parse -mstructure_size_boundary command line option. */
561 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
563 /* Used for Thumb call_via trampolines. */
564 rtx thumb_call_via_label[14];
565 static int thumb_call_reg_needed;
567 /* Bit values used to identify processor capabilities. */
568 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
569 #define FL_ARCH3M (1 << 1) /* Extended multiply */
570 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
571 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
572 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
573 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
574 #define FL_THUMB (1 << 6) /* Thumb aware */
575 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
576 #define FL_STRONG (1 << 8) /* StrongARM */
577 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
578 #define FL_XSCALE (1 << 10) /* XScale */
579 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
580 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
581 media instructions. */
582 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
583 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
584 Note: ARM6 & 7 derivatives only. */
585 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
586 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
587 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
589 #define FL_DIV (1 << 18) /* Hardware divide. */
590 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
591 #define FL_NEON (1 << 20) /* Neon instructions. */
592 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
595 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
597 /* Flags that only effect tuning, not available instructions. */
598 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
601 #define FL_FOR_ARCH2 FL_NOTM
602 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
603 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
604 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
605 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
606 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
607 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
608 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
609 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
610 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
611 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
612 #define FL_FOR_ARCH6J FL_FOR_ARCH6
613 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
614 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
615 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
616 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
617 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
618 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
619 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
620 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
621 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
622 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
624 /* The bits in this mask specify which
625 instructions we are allowed to generate. */
626 static unsigned long insn_flags = 0;
628 /* The bits in this mask specify which instruction scheduling options should
630 static unsigned long tune_flags = 0;
632 /* The following are used in the arm.md file as equivalents to bits
633 in the above two flag variables. */
635 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
638 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
641 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
644 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
647 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
650 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
653 /* Nonzero if this chip supports the ARM 6K extensions. */
656 /* Nonzero if instructions not present in the 'M' profile can be used. */
657 int arm_arch_notm = 0;
659 /* Nonzero if instructions present in ARMv7E-M can be used. */
662 /* Nonzero if this chip can benefit from load scheduling. */
663 int arm_ld_sched = 0;
665 /* Nonzero if this chip is a StrongARM. */
666 int arm_tune_strongarm = 0;
668 /* Nonzero if this chip is a Cirrus variant. */
669 int arm_arch_cirrus = 0;
671 /* Nonzero if this chip supports Intel Wireless MMX technology. */
672 int arm_arch_iwmmxt = 0;
674 /* Nonzero if this chip is an XScale. */
675 int arm_arch_xscale = 0;
677 /* Nonzero if tuning for XScale */
678 int arm_tune_xscale = 0;
680 /* Nonzero if we want to tune for stores that access the write-buffer.
681 This typically means an ARM6 or ARM7 with MMU or MPU. */
682 int arm_tune_wbuf = 0;
684 /* Nonzero if tuning for Cortex-A9. */
685 int arm_tune_cortex_a9 = 0;
687 /* Nonzero if generating Thumb instructions. */
690 /* Nonzero if we should define __THUMB_INTERWORK__ in the
692 XXX This is a bit of a hack, it's intended to help work around
693 problems in GLD which doesn't understand that armv5t code is
694 interworking clean. */
695 int arm_cpp_interwork = 0;
697 /* Nonzero if chip supports Thumb 2. */
700 /* Nonzero if chip supports integer division instruction. */
703 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
704 we must report the mode of the memory reference from
705 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
706 enum machine_mode output_memory_reference_mode;
708 /* The register number to be used for the PIC offset register. */
709 unsigned arm_pic_register = INVALID_REGNUM;
711 /* Set to 1 after arm_reorg has started. Reset to start at the start of
712 the next function. */
713 static int after_arm_reorg = 0;
715 enum arm_pcs arm_pcs_default;
717 /* For an explanation of these variables, see final_prescan_insn below. */
719 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
720 enum arm_cond_code arm_current_cc;
722 int arm_target_label;
723 /* The number of conditionally executed insns, including the current insn. */
724 int arm_condexec_count = 0;
725 /* A bitmask specifying the patterns for the IT block.
726 Zero means do not output an IT block before this insn. */
727 int arm_condexec_mask = 0;
728 /* The number of bits used in arm_condexec_mask. */
729 int arm_condexec_masklen = 0;
731 /* The condition codes of the ARM, and the inverse function. */
732 static const char * const arm_condition_codes[] =
734 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
735 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
738 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
739 #define streq(string1, string2) (strcmp (string1, string2) == 0)
741 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
742 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
743 | (1 << PIC_OFFSET_TABLE_REGNUM)))
745 /* Initialization code. */
749 const char *const name;
750 enum processor_type core;
752 const unsigned long flags;
753 const struct tune_params *const tune;
756 const struct tune_params arm_slowmul_tune =
758 arm_slowmul_rtx_costs,
762 const struct tune_params arm_fastmul_tune =
764 arm_fastmul_rtx_costs,
768 const struct tune_params arm_xscale_tune =
770 arm_xscale_rtx_costs,
774 const struct tune_params arm_9e_tune =
780 /* Not all of these give usefully different compilation alternatives,
781 but there is no simple way of generalizing them. */
782 static const struct processors all_cores[] =
785 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
786 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
787 #include "arm-cores.def"
789 {NULL, arm_none, NULL, 0, NULL}
792 static const struct processors all_architectures[] =
794 /* ARM Architectures */
795 /* We don't specify tuning costs here as it will be figured out
798 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
799 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
800 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
801 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
802 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
803 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
804 implementations that support it, so we will leave it out for now. */
805 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
806 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
807 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
808 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
809 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
810 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
811 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
812 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
813 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
814 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
815 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
816 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
817 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
818 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
819 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
820 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
821 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
822 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
823 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
824 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
825 {NULL, arm_none, NULL, 0 , NULL}
829 /* These are populated as commandline arguments are processed, or NULL
831 static const struct processors *arm_selected_arch;
832 static const struct processors *arm_selected_cpu;
833 static const struct processors *arm_selected_tune;
835 /* The name of the preprocessor macro to define for this architecture. */
837 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
839 /* Available values for -mfpu=. */
841 static const struct arm_fpu_desc all_fpus[] =
843 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
844 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
845 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
846 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
847 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
848 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
849 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
850 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
851 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
852 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
853 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
854 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
855 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
856 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
857 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
858 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
859 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
860 /* Compatibility aliases. */
861 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
868 enum float_abi_type abi_type;
872 /* Available values for -mfloat-abi=. */
874 static const struct float_abi all_float_abis[] =
876 {"soft", ARM_FLOAT_ABI_SOFT},
877 {"softfp", ARM_FLOAT_ABI_SOFTFP},
878 {"hard", ARM_FLOAT_ABI_HARD}
885 enum arm_fp16_format_type fp16_format_type;
889 /* Available values for -mfp16-format=. */
891 static const struct fp16_format all_fp16_formats[] =
893 {"none", ARM_FP16_FORMAT_NONE},
894 {"ieee", ARM_FP16_FORMAT_IEEE},
895 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
902 enum arm_abi_type abi_type;
906 /* Available values for -mabi=. */
908 static const struct abi_name arm_all_abis[] =
910 {"apcs-gnu", ARM_ABI_APCS},
911 {"atpcs", ARM_ABI_ATPCS},
912 {"aapcs", ARM_ABI_AAPCS},
913 {"iwmmxt", ARM_ABI_IWMMXT},
914 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
917 /* Supported TLS relocations. */
927 /* The maximum number of insns to be used when loading a constant. */
929 arm_constant_limit (bool size_p)
931 return size_p ? 1 : current_tune->constant_limit;
934 /* Emit an insn that's a simple single-set. Both the operands must be known
937 emit_set_insn (rtx x, rtx y)
939 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
942 /* Return the number of bits set in VALUE. */
944 bit_count (unsigned long value)
946 unsigned long count = 0;
951 value &= value - 1; /* Clear the least-significant set bit. */
957 /* Set up library functions unique to ARM. */
960 arm_init_libfuncs (void)
962 /* There are no special library functions unless we are using the
967 /* The functions below are described in Section 4 of the "Run-Time
968 ABI for the ARM architecture", Version 1.0. */
970 /* Double-precision floating-point arithmetic. Table 2. */
971 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
972 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
973 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
974 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
975 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
977 /* Double-precision comparisons. Table 3. */
978 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
979 set_optab_libfunc (ne_optab, DFmode, NULL);
980 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
981 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
982 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
983 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
984 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
986 /* Single-precision floating-point arithmetic. Table 4. */
987 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
988 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
989 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
990 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
991 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
993 /* Single-precision comparisons. Table 5. */
994 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
995 set_optab_libfunc (ne_optab, SFmode, NULL);
996 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
997 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
998 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
999 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1000 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1002 /* Floating-point to integer conversions. Table 6. */
1003 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1004 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1005 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1006 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1007 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1008 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1009 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1010 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1012 /* Conversions between floating types. Table 7. */
1013 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1014 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1016 /* Integer to floating-point conversions. Table 8. */
1017 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1018 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1019 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1020 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1021 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1022 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1023 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1024 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1026 /* Long long. Table 9. */
1027 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1028 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1029 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1030 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1031 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1032 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1033 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1034 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1036 /* Integer (32/32->32) division. \S 4.3.1. */
1037 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1038 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1040 /* The divmod functions are designed so that they can be used for
1041 plain division, even though they return both the quotient and the
1042 remainder. The quotient is returned in the usual location (i.e.,
1043 r0 for SImode, {r0, r1} for DImode), just as would be expected
1044 for an ordinary division routine. Because the AAPCS calling
1045 conventions specify that all of { r0, r1, r2, r3 } are
1046 callee-saved registers, there is no need to tell the compiler
1047 explicitly that those registers are clobbered by these
1049 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1050 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1052 /* For SImode division the ABI provides div-without-mod routines,
1053 which are faster. */
1054 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1055 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1057 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1058 divmod libcalls instead. */
1059 set_optab_libfunc (smod_optab, DImode, NULL);
1060 set_optab_libfunc (umod_optab, DImode, NULL);
1061 set_optab_libfunc (smod_optab, SImode, NULL);
1062 set_optab_libfunc (umod_optab, SImode, NULL);
1064 /* Half-precision float operations. The compiler handles all operations
1065 with NULL libfuncs by converting the SFmode. */
1066 switch (arm_fp16_format)
1068 case ARM_FP16_FORMAT_IEEE:
1069 case ARM_FP16_FORMAT_ALTERNATIVE:
1072 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1073 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1075 : "__gnu_f2h_alternative"));
1076 set_conv_libfunc (sext_optab, SFmode, HFmode,
1077 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1079 : "__gnu_h2f_alternative"));
1082 set_optab_libfunc (add_optab, HFmode, NULL);
1083 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1084 set_optab_libfunc (smul_optab, HFmode, NULL);
1085 set_optab_libfunc (neg_optab, HFmode, NULL);
1086 set_optab_libfunc (sub_optab, HFmode, NULL);
1089 set_optab_libfunc (eq_optab, HFmode, NULL);
1090 set_optab_libfunc (ne_optab, HFmode, NULL);
1091 set_optab_libfunc (lt_optab, HFmode, NULL);
1092 set_optab_libfunc (le_optab, HFmode, NULL);
1093 set_optab_libfunc (ge_optab, HFmode, NULL);
1094 set_optab_libfunc (gt_optab, HFmode, NULL);
1095 set_optab_libfunc (unord_optab, HFmode, NULL);
1102 if (TARGET_AAPCS_BASED)
1103 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1106 /* On AAPCS systems, this is the "struct __va_list". */
1107 static GTY(()) tree va_list_type;
1109 /* Return the type to use as __builtin_va_list. */
1111 arm_build_builtin_va_list (void)
1116 if (!TARGET_AAPCS_BASED)
1117 return std_build_builtin_va_list ();
1119 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1127 The C Library ABI further reinforces this definition in \S
1130 We must follow this definition exactly. The structure tag
1131 name is visible in C++ mangled names, and thus forms a part
1132 of the ABI. The field name may be used by people who
1133 #include <stdarg.h>. */
1134 /* Create the type. */
1135 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1136 /* Give it the required name. */
1137 va_list_name = build_decl (BUILTINS_LOCATION,
1139 get_identifier ("__va_list"),
1141 DECL_ARTIFICIAL (va_list_name) = 1;
1142 TYPE_NAME (va_list_type) = va_list_name;
1143 /* Create the __ap field. */
1144 ap_field = build_decl (BUILTINS_LOCATION,
1146 get_identifier ("__ap"),
1148 DECL_ARTIFICIAL (ap_field) = 1;
1149 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1150 TYPE_FIELDS (va_list_type) = ap_field;
1151 /* Compute its layout. */
1152 layout_type (va_list_type);
1154 return va_list_type;
1157 /* Return an expression of type "void *" pointing to the next
1158 available argument in a variable-argument list. VALIST is the
1159 user-level va_list object, of type __builtin_va_list. */
1161 arm_extract_valist_ptr (tree valist)
1163 if (TREE_TYPE (valist) == error_mark_node)
1164 return error_mark_node;
1166 /* On an AAPCS target, the pointer is stored within "struct
1168 if (TARGET_AAPCS_BASED)
1170 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1171 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1172 valist, ap_field, NULL_TREE);
1178 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1180 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1182 valist = arm_extract_valist_ptr (valist);
1183 std_expand_builtin_va_start (valist, nextarg);
1186 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1188 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1191 valist = arm_extract_valist_ptr (valist);
1192 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1195 /* Lookup NAME in SEL. */
1197 static const struct processors *
1198 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1200 if (!(name && *name))
1203 for (; sel->name != NULL; sel++)
1205 if (streq (name, sel->name))
1209 error ("bad value (%s) for %s switch", name, desc);
1213 /* Implement TARGET_HANDLE_OPTION. */
1216 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1221 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1225 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1228 case OPT_mhard_float:
1229 target_float_abi_name = "hard";
1232 case OPT_msoft_float:
1233 target_float_abi_name = "soft";
1237 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1246 arm_target_help (void)
1249 static int columns = 0;
1252 /* If we have not done so already, obtain the desired maximum width of
1253 the output. Note - this is a duplication of the code at the start of
1254 gcc/opts.c:print_specific_help() - the two copies should probably be
1255 replaced by a single function. */
1260 GET_ENVIRONMENT (p, "COLUMNS");
1263 int value = atoi (p);
1270 /* Use a reasonable default. */
1274 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1276 /* The - 2 is because we know that the last entry in the array is NULL. */
1277 i = ARRAY_SIZE (all_cores) - 2;
1279 printf (" %s", all_cores[i].name);
1280 remaining = columns - (strlen (all_cores[i].name) + 4);
1281 gcc_assert (remaining >= 0);
1285 int len = strlen (all_cores[i].name);
1287 if (remaining > len + 2)
1289 printf (", %s", all_cores[i].name);
1290 remaining -= len + 2;
1296 printf ("\n %s", all_cores[i].name);
1297 remaining = columns - (len + 4);
1301 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1303 i = ARRAY_SIZE (all_architectures) - 2;
1306 printf (" %s", all_architectures[i].name);
1307 remaining = columns - (strlen (all_architectures[i].name) + 4);
1308 gcc_assert (remaining >= 0);
1312 int len = strlen (all_architectures[i].name);
1314 if (remaining > len + 2)
1316 printf (", %s", all_architectures[i].name);
1317 remaining -= len + 2;
1323 printf ("\n %s", all_architectures[i].name);
1324 remaining = columns - (len + 4);
1331 /* Fix up any incompatible options that the user has specified.
1332 This has now turned into a maze. */
1334 arm_override_options (void)
1338 if (arm_selected_arch)
1340 if (arm_selected_cpu)
1342 /* Check for conflict between mcpu and march. */
1343 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1345 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1346 arm_selected_cpu->name, arm_selected_arch->name);
1347 /* -march wins for code generation.
1348 -mcpu wins for default tuning. */
1349 if (!arm_selected_tune)
1350 arm_selected_tune = arm_selected_cpu;
1352 arm_selected_cpu = arm_selected_arch;
1356 arm_selected_arch = NULL;
1359 /* Pick a CPU based on the architecture. */
1360 arm_selected_cpu = arm_selected_arch;
1363 /* If the user did not specify a processor, choose one for them. */
1364 if (!arm_selected_cpu)
1366 const struct processors * sel;
1367 unsigned int sought;
1369 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1370 if (!arm_selected_cpu->name)
1372 #ifdef SUBTARGET_CPU_DEFAULT
1373 /* Use the subtarget default CPU if none was specified by
1375 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1377 /* Default to ARM6. */
1378 if (arm_selected_cpu->name)
1379 arm_selected_cpu = &all_cores[arm6];
1382 sel = arm_selected_cpu;
1383 insn_flags = sel->flags;
1385 /* Now check to see if the user has specified some command line
1386 switch that require certain abilities from the cpu. */
1389 if (TARGET_INTERWORK || TARGET_THUMB)
1391 sought |= (FL_THUMB | FL_MODE32);
1393 /* There are no ARM processors that support both APCS-26 and
1394 interworking. Therefore we force FL_MODE26 to be removed
1395 from insn_flags here (if it was set), so that the search
1396 below will always be able to find a compatible processor. */
1397 insn_flags &= ~FL_MODE26;
1400 if (sought != 0 && ((sought & insn_flags) != sought))
1402 /* Try to locate a CPU type that supports all of the abilities
1403 of the default CPU, plus the extra abilities requested by
1405 for (sel = all_cores; sel->name != NULL; sel++)
1406 if ((sel->flags & sought) == (sought | insn_flags))
1409 if (sel->name == NULL)
1411 unsigned current_bit_count = 0;
1412 const struct processors * best_fit = NULL;
1414 /* Ideally we would like to issue an error message here
1415 saying that it was not possible to find a CPU compatible
1416 with the default CPU, but which also supports the command
1417 line options specified by the programmer, and so they
1418 ought to use the -mcpu=<name> command line option to
1419 override the default CPU type.
1421 If we cannot find a cpu that has both the
1422 characteristics of the default cpu and the given
1423 command line options we scan the array again looking
1424 for a best match. */
1425 for (sel = all_cores; sel->name != NULL; sel++)
1426 if ((sel->flags & sought) == sought)
1430 count = bit_count (sel->flags & insn_flags);
1432 if (count >= current_bit_count)
1435 current_bit_count = count;
1439 gcc_assert (best_fit);
1443 arm_selected_cpu = sel;
1447 gcc_assert (arm_selected_cpu);
1448 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1449 if (!arm_selected_tune)
1450 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1452 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1453 insn_flags = arm_selected_cpu->flags;
1455 arm_tune = arm_selected_tune->core;
1456 tune_flags = arm_selected_tune->flags;
1457 current_tune = arm_selected_tune->tune;
1459 if (target_fp16_format_name)
1461 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1463 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1465 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1469 if (i == ARRAY_SIZE (all_fp16_formats))
1470 error ("invalid __fp16 format option: -mfp16-format=%s",
1471 target_fp16_format_name);
1474 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1476 if (target_abi_name)
1478 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1480 if (streq (arm_all_abis[i].name, target_abi_name))
1482 arm_abi = arm_all_abis[i].abi_type;
1486 if (i == ARRAY_SIZE (arm_all_abis))
1487 error ("invalid ABI option: -mabi=%s", target_abi_name);
1490 arm_abi = ARM_DEFAULT_ABI;
1492 /* Make sure that the processor choice does not conflict with any of the
1493 other command line choices. */
1494 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1495 error ("target CPU does not support ARM mode");
1497 /* BPABI targets use linker tricks to allow interworking on cores
1498 without thumb support. */
1499 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1501 warning (0, "target CPU does not support interworking" );
1502 target_flags &= ~MASK_INTERWORK;
1505 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1507 warning (0, "target CPU does not support THUMB instructions");
1508 target_flags &= ~MASK_THUMB;
1511 if (TARGET_APCS_FRAME && TARGET_THUMB)
1513 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1514 target_flags &= ~MASK_APCS_FRAME;
1517 /* Callee super interworking implies thumb interworking. Adding
1518 this to the flags here simplifies the logic elsewhere. */
1519 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1520 target_flags |= MASK_INTERWORK;
1522 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1523 from here where no function is being compiled currently. */
1524 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1525 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1527 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1528 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1530 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1531 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1533 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1535 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1536 target_flags |= MASK_APCS_FRAME;
1539 if (TARGET_POKE_FUNCTION_NAME)
1540 target_flags |= MASK_APCS_FRAME;
1542 if (TARGET_APCS_REENT && flag_pic)
1543 error ("-fpic and -mapcs-reent are incompatible");
1545 if (TARGET_APCS_REENT)
1546 warning (0, "APCS reentrant code not supported. Ignored");
1548 /* If this target is normally configured to use APCS frames, warn if they
1549 are turned off and debugging is turned on. */
1551 && write_symbols != NO_DEBUG
1552 && !TARGET_APCS_FRAME
1553 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1554 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1556 if (TARGET_APCS_FLOAT)
1557 warning (0, "passing floating point arguments in fp regs not yet supported");
1559 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1560 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1561 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1562 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1563 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1564 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1565 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1566 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1567 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1568 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1569 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1570 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1571 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1573 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1574 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1575 thumb_code = (TARGET_ARM == 0);
1576 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1577 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1578 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1579 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1580 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1582 /* If we are not using the default (ARM mode) section anchor offset
1583 ranges, then set the correct ranges now. */
1586 /* Thumb-1 LDR instructions cannot have negative offsets.
1587 Permissible positive offset ranges are 5-bit (for byte loads),
1588 6-bit (for halfword loads), or 7-bit (for word loads).
1589 Empirical results suggest a 7-bit anchor range gives the best
1590 overall code size. */
1591 targetm.min_anchor_offset = 0;
1592 targetm.max_anchor_offset = 127;
1594 else if (TARGET_THUMB2)
1596 /* The minimum is set such that the total size of the block
1597 for a particular anchor is 248 + 1 + 4095 bytes, which is
1598 divisible by eight, ensuring natural spacing of anchors. */
1599 targetm.min_anchor_offset = -248;
1600 targetm.max_anchor_offset = 4095;
1603 /* V5 code we generate is completely interworking capable, so we turn off
1604 TARGET_INTERWORK here to avoid many tests later on. */
1606 /* XXX However, we must pass the right pre-processor defines to CPP
1607 or GLD can get confused. This is a hack. */
1608 if (TARGET_INTERWORK)
1609 arm_cpp_interwork = 1;
1612 target_flags &= ~MASK_INTERWORK;
1614 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1615 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1617 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1618 error ("iwmmxt abi requires an iwmmxt capable cpu");
1620 if (target_fpu_name == NULL && target_fpe_name != NULL)
1622 if (streq (target_fpe_name, "2"))
1623 target_fpu_name = "fpe2";
1624 else if (streq (target_fpe_name, "3"))
1625 target_fpu_name = "fpe3";
1627 error ("invalid floating point emulation option: -mfpe=%s",
1631 if (target_fpu_name == NULL)
1633 #ifdef FPUTYPE_DEFAULT
1634 target_fpu_name = FPUTYPE_DEFAULT;
1636 if (arm_arch_cirrus)
1637 target_fpu_name = "maverick";
1639 target_fpu_name = "fpe2";
1643 arm_fpu_desc = NULL;
1644 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1646 if (streq (all_fpus[i].name, target_fpu_name))
1648 arm_fpu_desc = &all_fpus[i];
1655 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1659 switch (arm_fpu_desc->model)
1661 case ARM_FP_MODEL_FPA:
1662 if (arm_fpu_desc->rev == 2)
1663 arm_fpu_attr = FPU_FPE2;
1664 else if (arm_fpu_desc->rev == 3)
1665 arm_fpu_attr = FPU_FPE3;
1667 arm_fpu_attr = FPU_FPA;
1670 case ARM_FP_MODEL_MAVERICK:
1671 arm_fpu_attr = FPU_MAVERICK;
1674 case ARM_FP_MODEL_VFP:
1675 arm_fpu_attr = FPU_VFP;
1682 if (target_float_abi_name != NULL)
1684 /* The user specified a FP ABI. */
1685 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1687 if (streq (all_float_abis[i].name, target_float_abi_name))
1689 arm_float_abi = all_float_abis[i].abi_type;
1693 if (i == ARRAY_SIZE (all_float_abis))
1694 error ("invalid floating point abi: -mfloat-abi=%s",
1695 target_float_abi_name);
1698 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1700 if (TARGET_AAPCS_BASED
1701 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1702 error ("FPA is unsupported in the AAPCS");
1704 if (TARGET_AAPCS_BASED)
1706 if (TARGET_CALLER_INTERWORKING)
1707 error ("AAPCS does not support -mcaller-super-interworking");
1709 if (TARGET_CALLEE_INTERWORKING)
1710 error ("AAPCS does not support -mcallee-super-interworking");
1713 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1714 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1715 will ever exist. GCC makes no attempt to support this combination. */
1716 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1717 sorry ("iWMMXt and hardware floating point");
1719 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1720 if (TARGET_THUMB2 && TARGET_IWMMXT)
1721 sorry ("Thumb-2 iWMMXt");
1723 /* __fp16 support currently assumes the core has ldrh. */
1724 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1725 sorry ("__fp16 and no ldrh");
1727 /* If soft-float is specified then don't use FPU. */
1728 if (TARGET_SOFT_FLOAT)
1729 arm_fpu_attr = FPU_NONE;
1731 if (TARGET_AAPCS_BASED)
1733 if (arm_abi == ARM_ABI_IWMMXT)
1734 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1735 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1736 && TARGET_HARD_FLOAT
1738 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1740 arm_pcs_default = ARM_PCS_AAPCS;
1744 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1745 sorry ("-mfloat-abi=hard and VFP");
1747 if (arm_abi == ARM_ABI_APCS)
1748 arm_pcs_default = ARM_PCS_APCS;
1750 arm_pcs_default = ARM_PCS_ATPCS;
1753 /* For arm2/3 there is no need to do any scheduling if there is only
1754 a floating point emulator, or we are doing software floating-point. */
1755 if ((TARGET_SOFT_FLOAT
1756 || (TARGET_FPA && arm_fpu_desc->rev))
1757 && (tune_flags & FL_MODE32) == 0)
1758 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1760 if (target_thread_switch)
1762 if (strcmp (target_thread_switch, "soft") == 0)
1763 target_thread_pointer = TP_SOFT;
1764 else if (strcmp (target_thread_switch, "auto") == 0)
1765 target_thread_pointer = TP_AUTO;
1766 else if (strcmp (target_thread_switch, "cp15") == 0)
1767 target_thread_pointer = TP_CP15;
1769 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1772 /* Use the cp15 method if it is available. */
1773 if (target_thread_pointer == TP_AUTO)
1775 if (arm_arch6k && !TARGET_THUMB1)
1776 target_thread_pointer = TP_CP15;
1778 target_thread_pointer = TP_SOFT;
1781 if (TARGET_HARD_TP && TARGET_THUMB1)
1782 error ("can not use -mtp=cp15 with 16-bit Thumb");
1784 /* Override the default structure alignment for AAPCS ABI. */
1785 if (TARGET_AAPCS_BASED)
1786 arm_structure_size_boundary = 8;
1788 if (structure_size_string != NULL)
1790 int size = strtol (structure_size_string, NULL, 0);
1792 if (size == 8 || size == 32
1793 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1794 arm_structure_size_boundary = size;
1796 warning (0, "structure size boundary can only be set to %s",
1797 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1800 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1802 error ("RTP PIC is incompatible with Thumb");
1806 /* If stack checking is disabled, we can use r10 as the PIC register,
1807 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1808 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1810 if (TARGET_VXWORKS_RTP)
1811 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1812 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1815 if (flag_pic && TARGET_VXWORKS_RTP)
1816 arm_pic_register = 9;
1818 if (arm_pic_register_string != NULL)
1820 int pic_register = decode_reg_name (arm_pic_register_string);
1823 warning (0, "-mpic-register= is useless without -fpic");
1825 /* Prevent the user from choosing an obviously stupid PIC register. */
1826 else if (pic_register < 0 || call_used_regs[pic_register]
1827 || pic_register == HARD_FRAME_POINTER_REGNUM
1828 || pic_register == STACK_POINTER_REGNUM
1829 || pic_register >= PC_REGNUM
1830 || (TARGET_VXWORKS_RTP
1831 && (unsigned int) pic_register != arm_pic_register))
1832 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1834 arm_pic_register = pic_register;
1837 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1838 if (fix_cm3_ldrd == 2)
1840 if (arm_selected_cpu->core == cortexm3)
1846 if (TARGET_THUMB1 && flag_schedule_insns)
1848 /* Don't warn since it's on by default in -O2. */
1849 flag_schedule_insns = 0;
1854 /* If optimizing for size, bump the number of instructions that we
1855 are prepared to conditionally execute (even on a StrongARM). */
1856 max_insns_skipped = 6;
1860 /* StrongARM has early execution of branches, so a sequence
1861 that is worth skipping is shorter. */
1862 if (arm_tune_strongarm)
1863 max_insns_skipped = 3;
1866 /* Hot/Cold partitioning is not currently supported, since we can't
1867 handle literal pool placement in that case. */
1868 if (flag_reorder_blocks_and_partition)
1870 inform (input_location,
1871 "-freorder-blocks-and-partition not supported on this architecture");
1872 flag_reorder_blocks_and_partition = 0;
1873 flag_reorder_blocks = 1;
1876 if (!PARAM_SET_P (PARAM_GCSE_UNRESTRICTED_COST)
1878 /* Hoisting PIC address calculations more aggressively provides a small,
1879 but measurable, size reduction for PIC code. Therefore, we decrease
1880 the bar for unrestricted expression hoisting to the cost of PIC address
1881 calculation, which is 2 instructions. */
1882 set_param_value ("gcse-unrestricted-cost", 2);
1884 /* Register global variables with the garbage collector. */
1885 arm_add_gc_roots ();
1889 arm_add_gc_roots (void)
1891 gcc_obstack_init(&minipool_obstack);
1892 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1895 /* A table of known ARM exception types.
1896 For use with the interrupt function attribute. */
1900 const char *const arg;
1901 const unsigned long return_value;
1905 static const isr_attribute_arg isr_attribute_args [] =
1907 { "IRQ", ARM_FT_ISR },
1908 { "irq", ARM_FT_ISR },
1909 { "FIQ", ARM_FT_FIQ },
1910 { "fiq", ARM_FT_FIQ },
1911 { "ABORT", ARM_FT_ISR },
1912 { "abort", ARM_FT_ISR },
1913 { "ABORT", ARM_FT_ISR },
1914 { "abort", ARM_FT_ISR },
1915 { "UNDEF", ARM_FT_EXCEPTION },
1916 { "undef", ARM_FT_EXCEPTION },
1917 { "SWI", ARM_FT_EXCEPTION },
1918 { "swi", ARM_FT_EXCEPTION },
1919 { NULL, ARM_FT_NORMAL }
1922 /* Returns the (interrupt) function type of the current
1923 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1925 static unsigned long
1926 arm_isr_value (tree argument)
1928 const isr_attribute_arg * ptr;
1932 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1934 /* No argument - default to IRQ. */
1935 if (argument == NULL_TREE)
1938 /* Get the value of the argument. */
1939 if (TREE_VALUE (argument) == NULL_TREE
1940 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1941 return ARM_FT_UNKNOWN;
1943 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1945 /* Check it against the list of known arguments. */
1946 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1947 if (streq (arg, ptr->arg))
1948 return ptr->return_value;
1950 /* An unrecognized interrupt type. */
1951 return ARM_FT_UNKNOWN;
1954 /* Computes the type of the current function. */
1956 static unsigned long
1957 arm_compute_func_type (void)
1959 unsigned long type = ARM_FT_UNKNOWN;
1963 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1965 /* Decide if the current function is volatile. Such functions
1966 never return, and many memory cycles can be saved by not storing
1967 register values that will never be needed again. This optimization
1968 was added to speed up context switching in a kernel application. */
1970 && (TREE_NOTHROW (current_function_decl)
1971 || !(flag_unwind_tables
1972 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1973 && TREE_THIS_VOLATILE (current_function_decl))
1974 type |= ARM_FT_VOLATILE;
1976 if (cfun->static_chain_decl != NULL)
1977 type |= ARM_FT_NESTED;
1979 attr = DECL_ATTRIBUTES (current_function_decl);
1981 a = lookup_attribute ("naked", attr);
1983 type |= ARM_FT_NAKED;
1985 a = lookup_attribute ("isr", attr);
1987 a = lookup_attribute ("interrupt", attr);
1990 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1992 type |= arm_isr_value (TREE_VALUE (a));
1997 /* Returns the type of the current function. */
2000 arm_current_func_type (void)
2002 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2003 cfun->machine->func_type = arm_compute_func_type ();
2005 return cfun->machine->func_type;
2009 arm_allocate_stack_slots_for_args (void)
2011 /* Naked functions should not allocate stack slots for arguments. */
2012 return !IS_NAKED (arm_current_func_type ());
2016 /* Output assembler code for a block containing the constant parts
2017 of a trampoline, leaving space for the variable parts.
2019 On the ARM, (if r8 is the static chain regnum, and remembering that
2020 referencing pc adds an offset of 8) the trampoline looks like:
2023 .word static chain value
2024 .word function's address
2025 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2028 arm_asm_trampoline_template (FILE *f)
2032 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2033 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2035 else if (TARGET_THUMB2)
2037 /* The Thumb-2 trampoline is similar to the arm implementation.
2038 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2039 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2040 STATIC_CHAIN_REGNUM, PC_REGNUM);
2041 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2045 ASM_OUTPUT_ALIGN (f, 2);
2046 fprintf (f, "\t.code\t16\n");
2047 fprintf (f, ".Ltrampoline_start:\n");
2048 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2049 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2050 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2051 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2052 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2053 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2055 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2056 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2059 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2062 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2064 rtx fnaddr, mem, a_tramp;
2066 emit_block_move (m_tramp, assemble_trampoline_template (),
2067 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2069 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2070 emit_move_insn (mem, chain_value);
2072 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2073 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2074 emit_move_insn (mem, fnaddr);
2076 a_tramp = XEXP (m_tramp, 0);
2077 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2078 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2079 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2082 /* Thumb trampolines should be entered in thumb mode, so set
2083 the bottom bit of the address. */
2086 arm_trampoline_adjust_address (rtx addr)
2089 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2090 NULL, 0, OPTAB_LIB_WIDEN);
2094 /* Return 1 if it is possible to return using a single instruction.
2095 If SIBLING is non-null, this is a test for a return before a sibling
2096 call. SIBLING is the call insn, so we can examine its register usage. */
2099 use_return_insn (int iscond, rtx sibling)
2102 unsigned int func_type;
2103 unsigned long saved_int_regs;
2104 unsigned HOST_WIDE_INT stack_adjust;
2105 arm_stack_offsets *offsets;
2107 /* Never use a return instruction before reload has run. */
2108 if (!reload_completed)
2111 func_type = arm_current_func_type ();
2113 /* Naked, volatile and stack alignment functions need special
2115 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2118 /* So do interrupt functions that use the frame pointer and Thumb
2119 interrupt functions. */
2120 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2123 offsets = arm_get_frame_offsets ();
2124 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2126 /* As do variadic functions. */
2127 if (crtl->args.pretend_args_size
2128 || cfun->machine->uses_anonymous_args
2129 /* Or if the function calls __builtin_eh_return () */
2130 || crtl->calls_eh_return
2131 /* Or if the function calls alloca */
2132 || cfun->calls_alloca
2133 /* Or if there is a stack adjustment. However, if the stack pointer
2134 is saved on the stack, we can use a pre-incrementing stack load. */
2135 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2136 && stack_adjust == 4)))
2139 saved_int_regs = offsets->saved_regs_mask;
2141 /* Unfortunately, the insn
2143 ldmib sp, {..., sp, ...}
2145 triggers a bug on most SA-110 based devices, such that the stack
2146 pointer won't be correctly restored if the instruction takes a
2147 page fault. We work around this problem by popping r3 along with
2148 the other registers, since that is never slower than executing
2149 another instruction.
2151 We test for !arm_arch5 here, because code for any architecture
2152 less than this could potentially be run on one of the buggy
2154 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2156 /* Validate that r3 is a call-clobbered register (always true in
2157 the default abi) ... */
2158 if (!call_used_regs[3])
2161 /* ... that it isn't being used for a return value ... */
2162 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2165 /* ... or for a tail-call argument ... */
2168 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2170 if (find_regno_fusage (sibling, USE, 3))
2174 /* ... and that there are no call-saved registers in r0-r2
2175 (always true in the default ABI). */
2176 if (saved_int_regs & 0x7)
2180 /* Can't be done if interworking with Thumb, and any registers have been
2182 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2185 /* On StrongARM, conditional returns are expensive if they aren't
2186 taken and multiple registers have been stacked. */
2187 if (iscond && arm_tune_strongarm)
2189 /* Conditional return when just the LR is stored is a simple
2190 conditional-load instruction, that's not expensive. */
2191 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2195 && arm_pic_register != INVALID_REGNUM
2196 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2200 /* If there are saved registers but the LR isn't saved, then we need
2201 two instructions for the return. */
2202 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2205 /* Can't be done if any of the FPA regs are pushed,
2206 since this also requires an insn. */
2207 if (TARGET_HARD_FLOAT && TARGET_FPA)
2208 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2209 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2212 /* Likewise VFP regs. */
2213 if (TARGET_HARD_FLOAT && TARGET_VFP)
2214 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2215 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2218 if (TARGET_REALLY_IWMMXT)
2219 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2220 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2226 /* Return TRUE if int I is a valid immediate ARM constant. */
2229 const_ok_for_arm (HOST_WIDE_INT i)
2233 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2234 be all zero, or all one. */
2235 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2236 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2237 != ((~(unsigned HOST_WIDE_INT) 0)
2238 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2241 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2243 /* Fast return for 0 and small values. We must do this for zero, since
2244 the code below can't handle that one case. */
2245 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2248 /* Get the number of trailing zeros. */
2249 lowbit = ffs((int) i) - 1;
2251 /* Only even shifts are allowed in ARM mode so round down to the
2252 nearest even number. */
2256 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2261 /* Allow rotated constants in ARM mode. */
2263 && ((i & ~0xc000003f) == 0
2264 || (i & ~0xf000000f) == 0
2265 || (i & ~0xfc000003) == 0))
2272 /* Allow repeated pattern. */
2275 if (i == v || i == (v | (v << 8)))
2282 /* Return true if I is a valid constant for the operation CODE. */
2284 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2286 if (const_ok_for_arm (i))
2310 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2312 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2318 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2322 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2329 /* Emit a sequence of insns to handle a large constant.
2330 CODE is the code of the operation required, it can be any of SET, PLUS,
2331 IOR, AND, XOR, MINUS;
2332 MODE is the mode in which the operation is being performed;
2333 VAL is the integer to operate on;
2334 SOURCE is the other operand (a register, or a null-pointer for SET);
2335 SUBTARGETS means it is safe to create scratch registers if that will
2336 either produce a simpler sequence, or we will want to cse the values.
2337 Return value is the number of insns emitted. */
2339 /* ??? Tweak this for thumb2. */
2341 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2342 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2346 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2347 cond = COND_EXEC_TEST (PATTERN (insn));
2351 if (subtargets || code == SET
2352 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2353 && REGNO (target) != REGNO (source)))
2355 /* After arm_reorg has been called, we can't fix up expensive
2356 constants by pushing them into memory so we must synthesize
2357 them in-line, regardless of the cost. This is only likely to
2358 be more costly on chips that have load delay slots and we are
2359 compiling without running the scheduler (so no splitting
2360 occurred before the final instruction emission).
2362 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2364 if (!after_arm_reorg
2366 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2368 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2373 /* Currently SET is the only monadic value for CODE, all
2374 the rest are diadic. */
2375 if (TARGET_USE_MOVT)
2376 arm_emit_movpair (target, GEN_INT (val));
2378 emit_set_insn (target, GEN_INT (val));
2384 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2386 if (TARGET_USE_MOVT)
2387 arm_emit_movpair (temp, GEN_INT (val));
2389 emit_set_insn (temp, GEN_INT (val));
2391 /* For MINUS, the value is subtracted from, since we never
2392 have subtraction of a constant. */
2394 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2396 emit_set_insn (target,
2397 gen_rtx_fmt_ee (code, mode, source, temp));
2403 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2407 /* Return the number of instructions required to synthesize the given
2408 constant, if we start emitting them from bit-position I. */
2410 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2412 HOST_WIDE_INT temp1;
2413 int step_size = TARGET_ARM ? 2 : 1;
2416 gcc_assert (TARGET_ARM || i == 0);
2424 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2429 temp1 = remainder & ((0x0ff << end)
2430 | ((i < end) ? (0xff >> (32 - end)) : 0));
2431 remainder &= ~temp1;
2436 } while (remainder);
2441 find_best_start (unsigned HOST_WIDE_INT remainder)
2443 int best_consecutive_zeros = 0;
2447 /* If we aren't targetting ARM, the best place to start is always at
2452 for (i = 0; i < 32; i += 2)
2454 int consecutive_zeros = 0;
2456 if (!(remainder & (3 << i)))
2458 while ((i < 32) && !(remainder & (3 << i)))
2460 consecutive_zeros += 2;
2463 if (consecutive_zeros > best_consecutive_zeros)
2465 best_consecutive_zeros = consecutive_zeros;
2466 best_start = i - consecutive_zeros;
2472 /* So long as it won't require any more insns to do so, it's
2473 desirable to emit a small constant (in bits 0...9) in the last
2474 insn. This way there is more chance that it can be combined with
2475 a later addressing insn to form a pre-indexed load or store
2476 operation. Consider:
2478 *((volatile int *)0xe0000100) = 1;
2479 *((volatile int *)0xe0000110) = 2;
2481 We want this to wind up as:
2485 str rB, [rA, #0x100]
2487 str rB, [rA, #0x110]
2489 rather than having to synthesize both large constants from scratch.
2491 Therefore, we calculate how many insns would be required to emit
2492 the constant starting from `best_start', and also starting from
2493 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2494 yield a shorter sequence, we may as well use zero. */
2496 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2497 && (count_insns_for_constant (remainder, 0) <=
2498 count_insns_for_constant (remainder, best_start)))
2504 /* Emit an instruction with the indicated PATTERN. If COND is
2505 non-NULL, conditionalize the execution of the instruction on COND
2509 emit_constant_insn (rtx cond, rtx pattern)
2512 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2513 emit_insn (pattern);
2516 /* As above, but extra parameter GENERATE which, if clear, suppresses
2518 /* ??? This needs more work for thumb2. */
2521 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2522 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2527 int final_invert = 0;
2528 int can_negate_initial = 0;
2530 int num_bits_set = 0;
2531 int set_sign_bit_copies = 0;
2532 int clear_sign_bit_copies = 0;
2533 int clear_zero_bit_copies = 0;
2534 int set_zero_bit_copies = 0;
2536 unsigned HOST_WIDE_INT temp1, temp2;
2537 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2538 int step_size = TARGET_ARM ? 2 : 1;
2540 /* Find out which operations are safe for a given CODE. Also do a quick
2541 check for degenerate cases; these can occur when DImode operations
2552 can_negate_initial = 1;
2556 if (remainder == 0xffffffff)
2559 emit_constant_insn (cond,
2560 gen_rtx_SET (VOIDmode, target,
2561 GEN_INT (ARM_SIGN_EXTEND (val))));
2567 if (reload_completed && rtx_equal_p (target, source))
2571 emit_constant_insn (cond,
2572 gen_rtx_SET (VOIDmode, target, source));
2584 emit_constant_insn (cond,
2585 gen_rtx_SET (VOIDmode, target, const0_rtx));
2588 if (remainder == 0xffffffff)
2590 if (reload_completed && rtx_equal_p (target, source))
2593 emit_constant_insn (cond,
2594 gen_rtx_SET (VOIDmode, target, source));
2603 if (reload_completed && rtx_equal_p (target, source))
2606 emit_constant_insn (cond,
2607 gen_rtx_SET (VOIDmode, target, source));
2611 if (remainder == 0xffffffff)
2614 emit_constant_insn (cond,
2615 gen_rtx_SET (VOIDmode, target,
2616 gen_rtx_NOT (mode, source)));
2622 /* We treat MINUS as (val - source), since (source - val) is always
2623 passed as (source + (-val)). */
2627 emit_constant_insn (cond,
2628 gen_rtx_SET (VOIDmode, target,
2629 gen_rtx_NEG (mode, source)));
2632 if (const_ok_for_arm (val))
2635 emit_constant_insn (cond,
2636 gen_rtx_SET (VOIDmode, target,
2637 gen_rtx_MINUS (mode, GEN_INT (val),
2649 /* If we can do it in one insn get out quickly. */
2650 if (const_ok_for_arm (val)
2651 || (can_negate_initial && const_ok_for_arm (-val))
2652 || (can_invert && const_ok_for_arm (~val)))
2655 emit_constant_insn (cond,
2656 gen_rtx_SET (VOIDmode, target,
2658 ? gen_rtx_fmt_ee (code, mode, source,
2664 /* Calculate a few attributes that may be useful for specific
2666 /* Count number of leading zeros. */
2667 for (i = 31; i >= 0; i--)
2669 if ((remainder & (1 << i)) == 0)
2670 clear_sign_bit_copies++;
2675 /* Count number of leading 1's. */
2676 for (i = 31; i >= 0; i--)
2678 if ((remainder & (1 << i)) != 0)
2679 set_sign_bit_copies++;
2684 /* Count number of trailing zero's. */
2685 for (i = 0; i <= 31; i++)
2687 if ((remainder & (1 << i)) == 0)
2688 clear_zero_bit_copies++;
2693 /* Count number of trailing 1's. */
2694 for (i = 0; i <= 31; i++)
2696 if ((remainder & (1 << i)) != 0)
2697 set_zero_bit_copies++;
2705 /* See if we can use movw. */
2706 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2709 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2714 /* See if we can do this by sign_extending a constant that is known
2715 to be negative. This is a good, way of doing it, since the shift
2716 may well merge into a subsequent insn. */
2717 if (set_sign_bit_copies > 1)
2719 if (const_ok_for_arm
2720 (temp1 = ARM_SIGN_EXTEND (remainder
2721 << (set_sign_bit_copies - 1))))
2725 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2726 emit_constant_insn (cond,
2727 gen_rtx_SET (VOIDmode, new_src,
2729 emit_constant_insn (cond,
2730 gen_ashrsi3 (target, new_src,
2731 GEN_INT (set_sign_bit_copies - 1)));
2735 /* For an inverted constant, we will need to set the low bits,
2736 these will be shifted out of harm's way. */
2737 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2738 if (const_ok_for_arm (~temp1))
2742 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2743 emit_constant_insn (cond,
2744 gen_rtx_SET (VOIDmode, new_src,
2746 emit_constant_insn (cond,
2747 gen_ashrsi3 (target, new_src,
2748 GEN_INT (set_sign_bit_copies - 1)));
2754 /* See if we can calculate the value as the difference between two
2755 valid immediates. */
2756 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2758 int topshift = clear_sign_bit_copies & ~1;
2760 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2761 & (0xff000000 >> topshift));
2763 /* If temp1 is zero, then that means the 9 most significant
2764 bits of remainder were 1 and we've caused it to overflow.
2765 When topshift is 0 we don't need to do anything since we
2766 can borrow from 'bit 32'. */
2767 if (temp1 == 0 && topshift != 0)
2768 temp1 = 0x80000000 >> (topshift - 1);
2770 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2772 if (const_ok_for_arm (temp2))
2776 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2777 emit_constant_insn (cond,
2778 gen_rtx_SET (VOIDmode, new_src,
2780 emit_constant_insn (cond,
2781 gen_addsi3 (target, new_src,
2789 /* See if we can generate this by setting the bottom (or the top)
2790 16 bits, and then shifting these into the other half of the
2791 word. We only look for the simplest cases, to do more would cost
2792 too much. Be careful, however, not to generate this when the
2793 alternative would take fewer insns. */
2794 if (val & 0xffff0000)
2796 temp1 = remainder & 0xffff0000;
2797 temp2 = remainder & 0x0000ffff;
2799 /* Overlaps outside this range are best done using other methods. */
2800 for (i = 9; i < 24; i++)
2802 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2803 && !const_ok_for_arm (temp2))
2805 rtx new_src = (subtargets
2806 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2808 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2809 source, subtargets, generate);
2817 gen_rtx_ASHIFT (mode, source,
2824 /* Don't duplicate cases already considered. */
2825 for (i = 17; i < 24; i++)
2827 if (((temp1 | (temp1 >> i)) == remainder)
2828 && !const_ok_for_arm (temp1))
2830 rtx new_src = (subtargets
2831 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2833 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2834 source, subtargets, generate);
2839 gen_rtx_SET (VOIDmode, target,
2842 gen_rtx_LSHIFTRT (mode, source,
2853 /* If we have IOR or XOR, and the constant can be loaded in a
2854 single instruction, and we can find a temporary to put it in,
2855 then this can be done in two instructions instead of 3-4. */
2857 /* TARGET can't be NULL if SUBTARGETS is 0 */
2858 || (reload_completed && !reg_mentioned_p (target, source)))
2860 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2864 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2866 emit_constant_insn (cond,
2867 gen_rtx_SET (VOIDmode, sub,
2869 emit_constant_insn (cond,
2870 gen_rtx_SET (VOIDmode, target,
2871 gen_rtx_fmt_ee (code, mode,
2882 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2883 and the remainder 0s for e.g. 0xfff00000)
2884 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2886 This can be done in 2 instructions by using shifts with mov or mvn.
2891 mvn r0, r0, lsr #12 */
2892 if (set_sign_bit_copies > 8
2893 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2897 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2898 rtx shift = GEN_INT (set_sign_bit_copies);
2902 gen_rtx_SET (VOIDmode, sub,
2904 gen_rtx_ASHIFT (mode,
2909 gen_rtx_SET (VOIDmode, target,
2911 gen_rtx_LSHIFTRT (mode, sub,
2918 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2920 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2922 For eg. r0 = r0 | 0xfff
2927 if (set_zero_bit_copies > 8
2928 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2932 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2933 rtx shift = GEN_INT (set_zero_bit_copies);
2937 gen_rtx_SET (VOIDmode, sub,
2939 gen_rtx_LSHIFTRT (mode,
2944 gen_rtx_SET (VOIDmode, target,
2946 gen_rtx_ASHIFT (mode, sub,
2952 /* This will never be reached for Thumb2 because orn is a valid
2953 instruction. This is for Thumb1 and the ARM 32 bit cases.
2955 x = y | constant (such that ~constant is a valid constant)
2957 x = ~(~y & ~constant).
2959 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2963 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2964 emit_constant_insn (cond,
2965 gen_rtx_SET (VOIDmode, sub,
2966 gen_rtx_NOT (mode, source)));
2969 sub = gen_reg_rtx (mode);
2970 emit_constant_insn (cond,
2971 gen_rtx_SET (VOIDmode, sub,
2972 gen_rtx_AND (mode, source,
2974 emit_constant_insn (cond,
2975 gen_rtx_SET (VOIDmode, target,
2976 gen_rtx_NOT (mode, sub)));
2983 /* See if two shifts will do 2 or more insn's worth of work. */
2984 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2986 HOST_WIDE_INT shift_mask = ((0xffffffff
2987 << (32 - clear_sign_bit_copies))
2990 if ((remainder | shift_mask) != 0xffffffff)
2994 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2995 insns = arm_gen_constant (AND, mode, cond,
2996 remainder | shift_mask,
2997 new_src, source, subtargets, 1);
3002 rtx targ = subtargets ? NULL_RTX : target;
3003 insns = arm_gen_constant (AND, mode, cond,
3004 remainder | shift_mask,
3005 targ, source, subtargets, 0);
3011 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3012 rtx shift = GEN_INT (clear_sign_bit_copies);
3014 emit_insn (gen_ashlsi3 (new_src, source, shift));
3015 emit_insn (gen_lshrsi3 (target, new_src, shift));
3021 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3023 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3025 if ((remainder | shift_mask) != 0xffffffff)
3029 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3031 insns = arm_gen_constant (AND, mode, cond,
3032 remainder | shift_mask,
3033 new_src, source, subtargets, 1);
3038 rtx targ = subtargets ? NULL_RTX : target;
3040 insns = arm_gen_constant (AND, mode, cond,
3041 remainder | shift_mask,
3042 targ, source, subtargets, 0);
3048 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3049 rtx shift = GEN_INT (clear_zero_bit_copies);
3051 emit_insn (gen_lshrsi3 (new_src, source, shift));
3052 emit_insn (gen_ashlsi3 (target, new_src, shift));
3064 for (i = 0; i < 32; i++)
3065 if (remainder & (1 << i))
3069 || (code != IOR && can_invert && num_bits_set > 16))
3070 remainder ^= 0xffffffff;
3071 else if (code == PLUS && num_bits_set > 16)
3072 remainder = (-remainder) & 0xffffffff;
3074 /* For XOR, if more than half the bits are set and there's a sequence
3075 of more than 8 consecutive ones in the pattern then we can XOR by the
3076 inverted constant and then invert the final result; this may save an
3077 instruction and might also lead to the final mvn being merged with
3078 some other operation. */
3079 else if (code == XOR && num_bits_set > 16
3080 && (count_insns_for_constant (remainder ^ 0xffffffff,
3082 (remainder ^ 0xffffffff))
3083 < count_insns_for_constant (remainder,
3084 find_best_start (remainder))))
3086 remainder ^= 0xffffffff;
3095 /* Now try and find a way of doing the job in either two or three
3097 We start by looking for the largest block of zeros that are aligned on
3098 a 2-bit boundary, we then fill up the temps, wrapping around to the
3099 top of the word when we drop off the bottom.
3100 In the worst case this code should produce no more than four insns.
3101 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3102 best place to start. */
3104 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3107 /* Now start emitting the insns. */
3108 i = find_best_start (remainder);
3115 if (remainder & (3 << (i - 2)))
3120 temp1 = remainder & ((0x0ff << end)
3121 | ((i < end) ? (0xff >> (32 - end)) : 0));
3122 remainder &= ~temp1;
3126 rtx new_src, temp1_rtx;
3128 if (code == SET || code == MINUS)
3130 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3131 if (can_invert && code != MINUS)
3136 if ((final_invert || remainder) && subtargets)
3137 new_src = gen_reg_rtx (mode);
3142 else if (can_negate)
3146 temp1 = trunc_int_for_mode (temp1, mode);
3147 temp1_rtx = GEN_INT (temp1);
3151 else if (code == MINUS)
3152 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3154 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3156 emit_constant_insn (cond,
3157 gen_rtx_SET (VOIDmode, new_src,
3167 else if (code == MINUS)
3173 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3183 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3184 gen_rtx_NOT (mode, source)));
3191 /* Canonicalize a comparison so that we are more likely to recognize it.
3192 This can be done for a few constant compares, where we can make the
3193 immediate value easier to load. */
3196 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3198 enum machine_mode mode;
3199 unsigned HOST_WIDE_INT i, maxval;
3201 mode = GET_MODE (*op0);
3202 if (mode == VOIDmode)
3203 mode = GET_MODE (*op1);
3205 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3207 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3208 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3209 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3210 for GTU/LEU in Thumb mode. */
3215 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3217 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3220 if (code == GT || code == LE
3221 || (!TARGET_ARM && (code == GTU || code == LEU)))
3223 /* Missing comparison. First try to use an available
3225 if (GET_CODE (*op1) == CONST_INT)
3233 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3235 *op1 = GEN_INT (i + 1);
3236 return code == GT ? GE : LT;
3241 if (i != ~((unsigned HOST_WIDE_INT) 0)
3242 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3244 *op1 = GEN_INT (i + 1);
3245 return code == GTU ? GEU : LTU;
3253 /* If that did not work, reverse the condition. */
3257 return swap_condition (code);
3263 /* Comparisons smaller than DImode. Only adjust comparisons against
3264 an out-of-range constant. */
3265 if (GET_CODE (*op1) != CONST_INT
3266 || const_ok_for_arm (INTVAL (*op1))
3267 || const_ok_for_arm (- INTVAL (*op1)))
3281 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3283 *op1 = GEN_INT (i + 1);
3284 return code == GT ? GE : LT;
3291 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3293 *op1 = GEN_INT (i - 1);
3294 return code == GE ? GT : LE;
3300 if (i != ~((unsigned HOST_WIDE_INT) 0)
3301 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3303 *op1 = GEN_INT (i + 1);
3304 return code == GTU ? GEU : LTU;
3311 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3313 *op1 = GEN_INT (i - 1);
3314 return code == GEU ? GTU : LEU;
3326 /* Define how to find the value returned by a function. */
3329 arm_function_value(const_tree type, const_tree func,
3330 bool outgoing ATTRIBUTE_UNUSED)
3332 enum machine_mode mode;
3333 int unsignedp ATTRIBUTE_UNUSED;
3334 rtx r ATTRIBUTE_UNUSED;
3336 mode = TYPE_MODE (type);
3338 if (TARGET_AAPCS_BASED)
3339 return aapcs_allocate_return_reg (mode, type, func);
3341 /* Promote integer types. */
3342 if (INTEGRAL_TYPE_P (type))
3343 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3345 /* Promotes small structs returned in a register to full-word size
3346 for big-endian AAPCS. */
3347 if (arm_return_in_msb (type))
3349 HOST_WIDE_INT size = int_size_in_bytes (type);
3350 if (size % UNITS_PER_WORD != 0)
3352 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3353 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3357 return LIBCALL_VALUE (mode);
3361 libcall_eq (const void *p1, const void *p2)
3363 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3367 libcall_hash (const void *p1)
3369 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3373 add_libcall (htab_t htab, rtx libcall)
3375 *htab_find_slot (htab, libcall, INSERT) = libcall;
3379 arm_libcall_uses_aapcs_base (const_rtx libcall)
3381 static bool init_done = false;
3382 static htab_t libcall_htab;
3388 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3390 add_libcall (libcall_htab,
3391 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3392 add_libcall (libcall_htab,
3393 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3394 add_libcall (libcall_htab,
3395 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3396 add_libcall (libcall_htab,
3397 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3399 add_libcall (libcall_htab,
3400 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3401 add_libcall (libcall_htab,
3402 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3403 add_libcall (libcall_htab,
3404 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3405 add_libcall (libcall_htab,
3406 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3408 add_libcall (libcall_htab,
3409 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3410 add_libcall (libcall_htab,
3411 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3412 add_libcall (libcall_htab,
3413 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3414 add_libcall (libcall_htab,
3415 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3416 add_libcall (libcall_htab,
3417 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3418 add_libcall (libcall_htab,
3419 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3422 return libcall && htab_find (libcall_htab, libcall) != NULL;
3426 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3428 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3429 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3431 /* The following libcalls return their result in integer registers,
3432 even though they return a floating point value. */
3433 if (arm_libcall_uses_aapcs_base (libcall))
3434 return gen_rtx_REG (mode, ARG_REGISTER(1));
3438 return LIBCALL_VALUE (mode);
3441 /* Determine the amount of memory needed to store the possible return
3442 registers of an untyped call. */
3444 arm_apply_result_size (void)
3450 if (TARGET_HARD_FLOAT_ABI)
3456 if (TARGET_MAVERICK)
3459 if (TARGET_IWMMXT_ABI)
3466 /* Decide whether TYPE should be returned in memory (true)
3467 or in a register (false). FNTYPE is the type of the function making
3470 arm_return_in_memory (const_tree type, const_tree fntype)
3474 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3476 if (TARGET_AAPCS_BASED)
3478 /* Simple, non-aggregate types (ie not including vectors and
3479 complex) are always returned in a register (or registers).
3480 We don't care about which register here, so we can short-cut
3481 some of the detail. */
3482 if (!AGGREGATE_TYPE_P (type)
3483 && TREE_CODE (type) != VECTOR_TYPE
3484 && TREE_CODE (type) != COMPLEX_TYPE)
3487 /* Any return value that is no larger than one word can be
3489 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3492 /* Check any available co-processors to see if they accept the
3493 type as a register candidate (VFP, for example, can return
3494 some aggregates in consecutive registers). These aren't
3495 available if the call is variadic. */
3496 if (aapcs_select_return_coproc (type, fntype) >= 0)
3499 /* Vector values should be returned using ARM registers, not
3500 memory (unless they're over 16 bytes, which will break since
3501 we only have four call-clobbered registers to play with). */
3502 if (TREE_CODE (type) == VECTOR_TYPE)
3503 return (size < 0 || size > (4 * UNITS_PER_WORD));
3505 /* The rest go in memory. */
3509 if (TREE_CODE (type) == VECTOR_TYPE)
3510 return (size < 0 || size > (4 * UNITS_PER_WORD));
3512 if (!AGGREGATE_TYPE_P (type) &&
3513 (TREE_CODE (type) != VECTOR_TYPE))
3514 /* All simple types are returned in registers. */
3517 if (arm_abi != ARM_ABI_APCS)
3519 /* ATPCS and later return aggregate types in memory only if they are
3520 larger than a word (or are variable size). */
3521 return (size < 0 || size > UNITS_PER_WORD);
3524 /* For the arm-wince targets we choose to be compatible with Microsoft's
3525 ARM and Thumb compilers, which always return aggregates in memory. */
3527 /* All structures/unions bigger than one word are returned in memory.
3528 Also catch the case where int_size_in_bytes returns -1. In this case
3529 the aggregate is either huge or of variable size, and in either case
3530 we will want to return it via memory and not in a register. */
3531 if (size < 0 || size > UNITS_PER_WORD)
3534 if (TREE_CODE (type) == RECORD_TYPE)
3538 /* For a struct the APCS says that we only return in a register
3539 if the type is 'integer like' and every addressable element
3540 has an offset of zero. For practical purposes this means
3541 that the structure can have at most one non bit-field element
3542 and that this element must be the first one in the structure. */
3544 /* Find the first field, ignoring non FIELD_DECL things which will
3545 have been created by C++. */
3546 for (field = TYPE_FIELDS (type);
3547 field && TREE_CODE (field) != FIELD_DECL;
3548 field = DECL_CHAIN (field))
3552 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3554 /* Check that the first field is valid for returning in a register. */
3556 /* ... Floats are not allowed */
3557 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3560 /* ... Aggregates that are not themselves valid for returning in
3561 a register are not allowed. */
3562 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3565 /* Now check the remaining fields, if any. Only bitfields are allowed,
3566 since they are not addressable. */
3567 for (field = DECL_CHAIN (field);
3569 field = DECL_CHAIN (field))
3571 if (TREE_CODE (field) != FIELD_DECL)
3574 if (!DECL_BIT_FIELD_TYPE (field))
3581 if (TREE_CODE (type) == UNION_TYPE)
3585 /* Unions can be returned in registers if every element is
3586 integral, or can be returned in an integer register. */
3587 for (field = TYPE_FIELDS (type);
3589 field = DECL_CHAIN (field))
3591 if (TREE_CODE (field) != FIELD_DECL)
3594 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3597 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3603 #endif /* not ARM_WINCE */
3605 /* Return all other types in memory. */
3609 /* Indicate whether or not words of a double are in big-endian order. */
3612 arm_float_words_big_endian (void)
3614 if (TARGET_MAVERICK)
3617 /* For FPA, float words are always big-endian. For VFP, floats words
3618 follow the memory system mode. */
3626 return (TARGET_BIG_END ? 1 : 0);
3631 const struct pcs_attribute_arg
3635 } pcs_attribute_args[] =
3637 {"aapcs", ARM_PCS_AAPCS},
3638 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3640 /* We could recognize these, but changes would be needed elsewhere
3641 * to implement them. */
3642 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3643 {"atpcs", ARM_PCS_ATPCS},
3644 {"apcs", ARM_PCS_APCS},
3646 {NULL, ARM_PCS_UNKNOWN}
3650 arm_pcs_from_attribute (tree attr)
3652 const struct pcs_attribute_arg *ptr;
3655 /* Get the value of the argument. */
3656 if (TREE_VALUE (attr) == NULL_TREE
3657 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3658 return ARM_PCS_UNKNOWN;
3660 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3662 /* Check it against the list of known arguments. */
3663 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3664 if (streq (arg, ptr->arg))
3667 /* An unrecognized interrupt type. */
3668 return ARM_PCS_UNKNOWN;
3671 /* Get the PCS variant to use for this call. TYPE is the function's type
3672 specification, DECL is the specific declartion. DECL may be null if
3673 the call could be indirect or if this is a library call. */
3675 arm_get_pcs_model (const_tree type, const_tree decl)
3677 bool user_convention = false;
3678 enum arm_pcs user_pcs = arm_pcs_default;
3683 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3686 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3687 user_convention = true;
3690 if (TARGET_AAPCS_BASED)
3692 /* Detect varargs functions. These always use the base rules
3693 (no argument is ever a candidate for a co-processor
3695 bool base_rules = (TYPE_ARG_TYPES (type) != 0
3696 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
3697 != void_type_node));
3699 if (user_convention)
3701 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3702 sorry ("Non-AAPCS derived PCS variant");
3703 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3704 error ("Variadic functions must use the base AAPCS variant");
3708 return ARM_PCS_AAPCS;
3709 else if (user_convention)
3711 else if (decl && flag_unit_at_a_time)
3713 /* Local functions never leak outside this compilation unit,
3714 so we are free to use whatever conventions are
3716 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3717 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3719 return ARM_PCS_AAPCS_LOCAL;
3722 else if (user_convention && user_pcs != arm_pcs_default)
3723 sorry ("PCS variant");
3725 /* For everything else we use the target's default. */
3726 return arm_pcs_default;
3731 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3732 const_tree fntype ATTRIBUTE_UNUSED,
3733 rtx libcall ATTRIBUTE_UNUSED,
3734 const_tree fndecl ATTRIBUTE_UNUSED)
3736 /* Record the unallocated VFP registers. */
3737 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3738 pcum->aapcs_vfp_reg_alloc = 0;
3741 /* Walk down the type tree of TYPE counting consecutive base elements.
3742 If *MODEP is VOIDmode, then set it to the first valid floating point
3743 type. If a non-floating point type is found, or if a floating point
3744 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3745 otherwise return the count in the sub-tree. */
3747 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3749 enum machine_mode mode;
3752 switch (TREE_CODE (type))
3755 mode = TYPE_MODE (type);
3756 if (mode != DFmode && mode != SFmode)
3759 if (*modep == VOIDmode)
3768 mode = TYPE_MODE (TREE_TYPE (type));
3769 if (mode != DFmode && mode != SFmode)
3772 if (*modep == VOIDmode)
3781 /* Use V2SImode and V4SImode as representatives of all 64-bit
3782 and 128-bit vector types, whether or not those modes are
3783 supported with the present options. */
3784 size = int_size_in_bytes (type);
3797 if (*modep == VOIDmode)
3800 /* Vector modes are considered to be opaque: two vectors are
3801 equivalent for the purposes of being homogeneous aggregates
3802 if they are the same size. */
3811 tree index = TYPE_DOMAIN (type);
3813 /* Can't handle incomplete types. */
3814 if (!COMPLETE_TYPE_P(type))
3817 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3820 || !TYPE_MAX_VALUE (index)
3821 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3822 || !TYPE_MIN_VALUE (index)
3823 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3827 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3828 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3830 /* There must be no padding. */
3831 if (!host_integerp (TYPE_SIZE (type), 1)
3832 || (tree_low_cst (TYPE_SIZE (type), 1)
3833 != count * GET_MODE_BITSIZE (*modep)))
3845 /* Can't handle incomplete types. */
3846 if (!COMPLETE_TYPE_P(type))
3849 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3851 if (TREE_CODE (field) != FIELD_DECL)
3854 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3860 /* There must be no padding. */
3861 if (!host_integerp (TYPE_SIZE (type), 1)
3862 || (tree_low_cst (TYPE_SIZE (type), 1)
3863 != count * GET_MODE_BITSIZE (*modep)))
3870 case QUAL_UNION_TYPE:
3872 /* These aren't very interesting except in a degenerate case. */
3877 /* Can't handle incomplete types. */
3878 if (!COMPLETE_TYPE_P(type))
3881 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3883 if (TREE_CODE (field) != FIELD_DECL)
3886 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3889 count = count > sub_count ? count : sub_count;
3892 /* There must be no padding. */
3893 if (!host_integerp (TYPE_SIZE (type), 1)
3894 || (tree_low_cst (TYPE_SIZE (type), 1)
3895 != count * GET_MODE_BITSIZE (*modep)))
3908 /* Return true if PCS_VARIANT should use VFP registers. */
3910 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3912 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3914 static bool seen_thumb1_vfp = false;
3916 if (TARGET_THUMB1 && !seen_thumb1_vfp)
3918 sorry ("Thumb-1 hard-float VFP ABI");
3919 /* sorry() is not immediately fatal, so only display this once. */
3920 seen_thumb1_vfp = true;
3926 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3929 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3930 (TARGET_VFP_DOUBLE || !is_double));
3934 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3935 enum machine_mode mode, const_tree type,
3936 enum machine_mode *base_mode, int *count)
3938 enum machine_mode new_mode = VOIDmode;
3940 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3941 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3942 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3947 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3950 new_mode = (mode == DCmode ? DFmode : SFmode);
3952 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3954 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3956 if (ag_count > 0 && ag_count <= 4)
3965 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3968 *base_mode = new_mode;
3973 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3974 enum machine_mode mode, const_tree type)
3976 int count ATTRIBUTE_UNUSED;
3977 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3979 if (!use_vfp_abi (pcs_variant, false))
3981 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3986 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3989 if (!use_vfp_abi (pcum->pcs_variant, false))
3992 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
3993 &pcum->aapcs_vfp_rmode,
3994 &pcum->aapcs_vfp_rcount);
3998 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3999 const_tree type ATTRIBUTE_UNUSED)
4001 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4002 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4005 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4006 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4008 pcum->aapcs_vfp_reg_alloc = mask << regno;
4009 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4012 int rcount = pcum->aapcs_vfp_rcount;
4014 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4018 /* Avoid using unsupported vector modes. */
4019 if (rmode == V2SImode)
4021 else if (rmode == V4SImode)
4028 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4029 for (i = 0; i < rcount; i++)
4031 rtx tmp = gen_rtx_REG (rmode,
4032 FIRST_VFP_REGNUM + regno + i * rshift);
4033 tmp = gen_rtx_EXPR_LIST
4035 GEN_INT (i * GET_MODE_SIZE (rmode)));
4036 XVECEXP (par, 0, i) = tmp;
4039 pcum->aapcs_reg = par;
4042 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4049 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4050 enum machine_mode mode,
4051 const_tree type ATTRIBUTE_UNUSED)
4053 if (!use_vfp_abi (pcs_variant, false))
4056 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4059 enum machine_mode ag_mode;
4064 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4069 if (ag_mode == V2SImode)
4071 else if (ag_mode == V4SImode)
4077 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4078 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4079 for (i = 0; i < count; i++)
4081 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4082 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4083 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4084 XVECEXP (par, 0, i) = tmp;
4090 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4094 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4095 enum machine_mode mode ATTRIBUTE_UNUSED,
4096 const_tree type ATTRIBUTE_UNUSED)
4098 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4099 pcum->aapcs_vfp_reg_alloc = 0;
4103 #define AAPCS_CP(X) \
4105 aapcs_ ## X ## _cum_init, \
4106 aapcs_ ## X ## _is_call_candidate, \
4107 aapcs_ ## X ## _allocate, \
4108 aapcs_ ## X ## _is_return_candidate, \
4109 aapcs_ ## X ## _allocate_return_reg, \
4110 aapcs_ ## X ## _advance \
4113 /* Table of co-processors that can be used to pass arguments in
4114 registers. Idealy no arugment should be a candidate for more than
4115 one co-processor table entry, but the table is processed in order
4116 and stops after the first match. If that entry then fails to put
4117 the argument into a co-processor register, the argument will go on
4121 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4122 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4124 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4125 BLKmode) is a candidate for this co-processor's registers; this
4126 function should ignore any position-dependent state in
4127 CUMULATIVE_ARGS and only use call-type dependent information. */
4128 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4130 /* Return true if the argument does get a co-processor register; it
4131 should set aapcs_reg to an RTX of the register allocated as is
4132 required for a return from FUNCTION_ARG. */
4133 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4135 /* Return true if a result of mode MODE (or type TYPE if MODE is
4136 BLKmode) is can be returned in this co-processor's registers. */
4137 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4139 /* Allocate and return an RTX element to hold the return type of a
4140 call, this routine must not fail and will only be called if
4141 is_return_candidate returned true with the same parameters. */
4142 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4144 /* Finish processing this argument and prepare to start processing
4146 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4147 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4155 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4160 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4161 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4168 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4170 /* We aren't passed a decl, so we can't check that a call is local.
4171 However, it isn't clear that that would be a win anyway, since it
4172 might limit some tail-calling opportunities. */
4173 enum arm_pcs pcs_variant;
4177 const_tree fndecl = NULL_TREE;
4179 if (TREE_CODE (fntype) == FUNCTION_DECL)
4182 fntype = TREE_TYPE (fntype);
4185 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4188 pcs_variant = arm_pcs_default;
4190 if (pcs_variant != ARM_PCS_AAPCS)
4194 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4195 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4204 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4207 /* We aren't passed a decl, so we can't check that a call is local.
4208 However, it isn't clear that that would be a win anyway, since it
4209 might limit some tail-calling opportunities. */
4210 enum arm_pcs pcs_variant;
4211 int unsignedp ATTRIBUTE_UNUSED;
4215 const_tree fndecl = NULL_TREE;
4217 if (TREE_CODE (fntype) == FUNCTION_DECL)
4220 fntype = TREE_TYPE (fntype);
4223 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4226 pcs_variant = arm_pcs_default;
4228 /* Promote integer types. */
4229 if (type && INTEGRAL_TYPE_P (type))
4230 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4232 if (pcs_variant != ARM_PCS_AAPCS)
4236 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4237 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4239 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4243 /* Promotes small structs returned in a register to full-word size
4244 for big-endian AAPCS. */
4245 if (type && arm_return_in_msb (type))
4247 HOST_WIDE_INT size = int_size_in_bytes (type);
4248 if (size % UNITS_PER_WORD != 0)
4250 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4251 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4255 return gen_rtx_REG (mode, R0_REGNUM);
4259 aapcs_libcall_value (enum machine_mode mode)
4261 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4264 /* Lay out a function argument using the AAPCS rules. The rule
4265 numbers referred to here are those in the AAPCS. */
4267 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4268 tree type, int named)
4273 /* We only need to do this once per argument. */
4274 if (pcum->aapcs_arg_processed)
4277 pcum->aapcs_arg_processed = true;
4279 /* Special case: if named is false then we are handling an incoming
4280 anonymous argument which is on the stack. */
4284 /* Is this a potential co-processor register candidate? */
4285 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4287 int slot = aapcs_select_call_coproc (pcum, mode, type);
4288 pcum->aapcs_cprc_slot = slot;
4290 /* We don't have to apply any of the rules from part B of the
4291 preparation phase, these are handled elsewhere in the
4296 /* A Co-processor register candidate goes either in its own
4297 class of registers or on the stack. */
4298 if (!pcum->aapcs_cprc_failed[slot])
4300 /* C1.cp - Try to allocate the argument to co-processor
4302 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4305 /* C2.cp - Put the argument on the stack and note that we
4306 can't assign any more candidates in this slot. We also
4307 need to note that we have allocated stack space, so that
4308 we won't later try to split a non-cprc candidate between
4309 core registers and the stack. */
4310 pcum->aapcs_cprc_failed[slot] = true;
4311 pcum->can_split = false;
4314 /* We didn't get a register, so this argument goes on the
4316 gcc_assert (pcum->can_split == false);
4321 /* C3 - For double-word aligned arguments, round the NCRN up to the
4322 next even number. */
4323 ncrn = pcum->aapcs_ncrn;
4324 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4327 nregs = ARM_NUM_REGS2(mode, type);
4329 /* Sigh, this test should really assert that nregs > 0, but a GCC
4330 extension allows empty structs and then gives them empty size; it
4331 then allows such a structure to be passed by value. For some of
4332 the code below we have to pretend that such an argument has
4333 non-zero size so that we 'locate' it correctly either in
4334 registers or on the stack. */
4335 gcc_assert (nregs >= 0);
4337 nregs2 = nregs ? nregs : 1;
4339 /* C4 - Argument fits entirely in core registers. */
4340 if (ncrn + nregs2 <= NUM_ARG_REGS)
4342 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4343 pcum->aapcs_next_ncrn = ncrn + nregs;
4347 /* C5 - Some core registers left and there are no arguments already
4348 on the stack: split this argument between the remaining core
4349 registers and the stack. */
4350 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4352 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4353 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4354 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4358 /* C6 - NCRN is set to 4. */
4359 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4361 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4365 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4366 for a call to a function whose data type is FNTYPE.
4367 For a library call, FNTYPE is NULL. */
4369 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4371 tree fndecl ATTRIBUTE_UNUSED)
4373 /* Long call handling. */
4375 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4377 pcum->pcs_variant = arm_pcs_default;
4379 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4381 if (arm_libcall_uses_aapcs_base (libname))
4382 pcum->pcs_variant = ARM_PCS_AAPCS;
4384 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4385 pcum->aapcs_reg = NULL_RTX;
4386 pcum->aapcs_partial = 0;
4387 pcum->aapcs_arg_processed = false;
4388 pcum->aapcs_cprc_slot = -1;
4389 pcum->can_split = true;
4391 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4395 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4397 pcum->aapcs_cprc_failed[i] = false;
4398 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4406 /* On the ARM, the offset starts at 0. */
4408 pcum->iwmmxt_nregs = 0;
4409 pcum->can_split = true;
4411 /* Varargs vectors are treated the same as long long.
4412 named_count avoids having to change the way arm handles 'named' */
4413 pcum->named_count = 0;
4416 if (TARGET_REALLY_IWMMXT && fntype)
4420 for (fn_arg = TYPE_ARG_TYPES (fntype);
4422 fn_arg = TREE_CHAIN (fn_arg))
4423 pcum->named_count += 1;
4425 if (! pcum->named_count)
4426 pcum->named_count = INT_MAX;
4431 /* Return true if mode/type need doubleword alignment. */
4433 arm_needs_doubleword_align (enum machine_mode mode, tree type)
4435 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4436 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4440 /* Determine where to put an argument to a function.
4441 Value is zero to push the argument on the stack,
4442 or a hard register in which to store the argument.
4444 MODE is the argument's machine mode.
4445 TYPE is the data type of the argument (as a tree).
4446 This is null for libcalls where that information may
4448 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4449 the preceding args and about the function being called.
4450 NAMED is nonzero if this argument is a named parameter
4451 (otherwise it is an extra parameter matching an ellipsis). */
4454 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4455 tree type, int named)
4459 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4460 a call insn (op3 of a call_value insn). */
4461 if (mode == VOIDmode)
4464 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4466 aapcs_layout_arg (pcum, mode, type, named);
4467 return pcum->aapcs_reg;
4470 /* Varargs vectors are treated the same as long long.
4471 named_count avoids having to change the way arm handles 'named' */
4472 if (TARGET_IWMMXT_ABI
4473 && arm_vector_mode_supported_p (mode)
4474 && pcum->named_count > pcum->nargs + 1)
4476 if (pcum->iwmmxt_nregs <= 9)
4477 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4480 pcum->can_split = false;
4485 /* Put doubleword aligned quantities in even register pairs. */
4487 && ARM_DOUBLEWORD_ALIGN
4488 && arm_needs_doubleword_align (mode, type))
4491 /* Only allow splitting an arg between regs and memory if all preceding
4492 args were allocated to regs. For args passed by reference we only count
4493 the reference pointer. */
4494 if (pcum->can_split)
4497 nregs = ARM_NUM_REGS2 (mode, type);
4499 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4502 return gen_rtx_REG (mode, pcum->nregs);
4506 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4507 tree type, bool named)
4509 int nregs = pcum->nregs;
4511 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4513 aapcs_layout_arg (pcum, mode, type, named);
4514 return pcum->aapcs_partial;
4517 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4520 if (NUM_ARG_REGS > nregs
4521 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4523 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4529 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4530 tree type, bool named)
4532 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4534 aapcs_layout_arg (pcum, mode, type, named);
4536 if (pcum->aapcs_cprc_slot >= 0)
4538 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4540 pcum->aapcs_cprc_slot = -1;
4543 /* Generic stuff. */
4544 pcum->aapcs_arg_processed = false;
4545 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4546 pcum->aapcs_reg = NULL_RTX;
4547 pcum->aapcs_partial = 0;
4552 if (arm_vector_mode_supported_p (mode)
4553 && pcum->named_count > pcum->nargs
4554 && TARGET_IWMMXT_ABI)
4555 pcum->iwmmxt_nregs += 1;
4557 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4561 /* Variable sized types are passed by reference. This is a GCC
4562 extension to the ARM ABI. */
4565 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4566 enum machine_mode mode ATTRIBUTE_UNUSED,
4567 const_tree type, bool named ATTRIBUTE_UNUSED)
4569 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4572 /* Encode the current state of the #pragma [no_]long_calls. */
4575 OFF, /* No #pragma [no_]long_calls is in effect. */
4576 LONG, /* #pragma long_calls is in effect. */
4577 SHORT /* #pragma no_long_calls is in effect. */
4580 static arm_pragma_enum arm_pragma_long_calls = OFF;
4583 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4585 arm_pragma_long_calls = LONG;
4589 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4591 arm_pragma_long_calls = SHORT;
4595 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4597 arm_pragma_long_calls = OFF;
4600 /* Handle an attribute requiring a FUNCTION_DECL;
4601 arguments as in struct attribute_spec.handler. */
4603 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4604 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4606 if (TREE_CODE (*node) != FUNCTION_DECL)
4608 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4610 *no_add_attrs = true;
4616 /* Handle an "interrupt" or "isr" attribute;
4617 arguments as in struct attribute_spec.handler. */
4619 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4624 if (TREE_CODE (*node) != FUNCTION_DECL)
4626 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4628 *no_add_attrs = true;
4630 /* FIXME: the argument if any is checked for type attributes;
4631 should it be checked for decl ones? */
4635 if (TREE_CODE (*node) == FUNCTION_TYPE
4636 || TREE_CODE (*node) == METHOD_TYPE)
4638 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4640 warning (OPT_Wattributes, "%qE attribute ignored",
4642 *no_add_attrs = true;
4645 else if (TREE_CODE (*node) == POINTER_TYPE
4646 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4647 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4648 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4650 *node = build_variant_type_copy (*node);
4651 TREE_TYPE (*node) = build_type_attribute_variant
4653 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4654 *no_add_attrs = true;
4658 /* Possibly pass this attribute on from the type to a decl. */
4659 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4660 | (int) ATTR_FLAG_FUNCTION_NEXT
4661 | (int) ATTR_FLAG_ARRAY_NEXT))
4663 *no_add_attrs = true;
4664 return tree_cons (name, args, NULL_TREE);
4668 warning (OPT_Wattributes, "%qE attribute ignored",
4677 /* Handle a "pcs" attribute; arguments as in struct
4678 attribute_spec.handler. */
4680 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4681 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4683 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4685 warning (OPT_Wattributes, "%qE attribute ignored", name);
4686 *no_add_attrs = true;
4691 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4692 /* Handle the "notshared" attribute. This attribute is another way of
4693 requesting hidden visibility. ARM's compiler supports
4694 "__declspec(notshared)"; we support the same thing via an
4698 arm_handle_notshared_attribute (tree *node,
4699 tree name ATTRIBUTE_UNUSED,
4700 tree args ATTRIBUTE_UNUSED,
4701 int flags ATTRIBUTE_UNUSED,
4704 tree decl = TYPE_NAME (*node);
4708 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4709 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4710 *no_add_attrs = false;
4716 /* Return 0 if the attributes for two types are incompatible, 1 if they
4717 are compatible, and 2 if they are nearly compatible (which causes a
4718 warning to be generated). */
4720 arm_comp_type_attributes (const_tree type1, const_tree type2)
4724 /* Check for mismatch of non-default calling convention. */
4725 if (TREE_CODE (type1) != FUNCTION_TYPE)
4728 /* Check for mismatched call attributes. */
4729 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4730 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4731 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4732 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4734 /* Only bother to check if an attribute is defined. */
4735 if (l1 | l2 | s1 | s2)
4737 /* If one type has an attribute, the other must have the same attribute. */
4738 if ((l1 != l2) || (s1 != s2))
4741 /* Disallow mixed attributes. */
4742 if ((l1 & s2) || (l2 & s1))
4746 /* Check for mismatched ISR attribute. */
4747 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4749 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4750 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4752 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4759 /* Assigns default attributes to newly defined type. This is used to
4760 set short_call/long_call attributes for function types of
4761 functions defined inside corresponding #pragma scopes. */
4763 arm_set_default_type_attributes (tree type)
4765 /* Add __attribute__ ((long_call)) to all functions, when
4766 inside #pragma long_calls or __attribute__ ((short_call)),
4767 when inside #pragma no_long_calls. */
4768 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4770 tree type_attr_list, attr_name;
4771 type_attr_list = TYPE_ATTRIBUTES (type);
4773 if (arm_pragma_long_calls == LONG)
4774 attr_name = get_identifier ("long_call");
4775 else if (arm_pragma_long_calls == SHORT)
4776 attr_name = get_identifier ("short_call");
4780 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4781 TYPE_ATTRIBUTES (type) = type_attr_list;
4785 /* Return true if DECL is known to be linked into section SECTION. */
4788 arm_function_in_section_p (tree decl, section *section)
4790 /* We can only be certain about functions defined in the same
4791 compilation unit. */
4792 if (!TREE_STATIC (decl))
4795 /* Make sure that SYMBOL always binds to the definition in this
4796 compilation unit. */
4797 if (!targetm.binds_local_p (decl))
4800 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4801 if (!DECL_SECTION_NAME (decl))
4803 /* Make sure that we will not create a unique section for DECL. */
4804 if (flag_function_sections || DECL_ONE_ONLY (decl))
4808 return function_section (decl) == section;
4811 /* Return nonzero if a 32-bit "long_call" should be generated for
4812 a call from the current function to DECL. We generate a long_call
4815 a. has an __attribute__((long call))
4816 or b. is within the scope of a #pragma long_calls
4817 or c. the -mlong-calls command line switch has been specified
4819 However we do not generate a long call if the function:
4821 d. has an __attribute__ ((short_call))
4822 or e. is inside the scope of a #pragma no_long_calls
4823 or f. is defined in the same section as the current function. */
4826 arm_is_long_call_p (tree decl)
4831 return TARGET_LONG_CALLS;
4833 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4834 if (lookup_attribute ("short_call", attrs))
4837 /* For "f", be conservative, and only cater for cases in which the
4838 whole of the current function is placed in the same section. */
4839 if (!flag_reorder_blocks_and_partition
4840 && TREE_CODE (decl) == FUNCTION_DECL
4841 && arm_function_in_section_p (decl, current_function_section ()))
4844 if (lookup_attribute ("long_call", attrs))
4847 return TARGET_LONG_CALLS;
4850 /* Return nonzero if it is ok to make a tail-call to DECL. */
4852 arm_function_ok_for_sibcall (tree decl, tree exp)
4854 unsigned long func_type;
4856 if (cfun->machine->sibcall_blocked)
4859 /* Never tailcall something for which we have no decl, or if we
4860 are generating code for Thumb-1. */
4861 if (decl == NULL || TARGET_THUMB1)
4864 /* The PIC register is live on entry to VxWorks PLT entries, so we
4865 must make the call before restoring the PIC register. */
4866 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4869 /* Cannot tail-call to long calls, since these are out of range of
4870 a branch instruction. */
4871 if (arm_is_long_call_p (decl))
4874 /* If we are interworking and the function is not declared static
4875 then we can't tail-call it unless we know that it exists in this
4876 compilation unit (since it might be a Thumb routine). */
4877 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4880 func_type = arm_current_func_type ();
4881 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4882 if (IS_INTERRUPT (func_type))
4885 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4887 /* Check that the return value locations are the same. For
4888 example that we aren't returning a value from the sibling in
4889 a VFP register but then need to transfer it to a core
4893 a = arm_function_value (TREE_TYPE (exp), decl, false);
4894 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4896 if (!rtx_equal_p (a, b))
4900 /* Never tailcall if function may be called with a misaligned SP. */
4901 if (IS_STACKALIGN (func_type))
4904 /* Everything else is ok. */
4909 /* Addressing mode support functions. */
4911 /* Return nonzero if X is a legitimate immediate operand when compiling
4912 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4914 legitimate_pic_operand_p (rtx x)
4916 if (GET_CODE (x) == SYMBOL_REF
4917 || (GET_CODE (x) == CONST
4918 && GET_CODE (XEXP (x, 0)) == PLUS
4919 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4925 /* Record that the current function needs a PIC register. Initialize
4926 cfun->machine->pic_reg if we have not already done so. */
4929 require_pic_register (void)
4931 /* A lot of the logic here is made obscure by the fact that this
4932 routine gets called as part of the rtx cost estimation process.
4933 We don't want those calls to affect any assumptions about the real
4934 function; and further, we can't call entry_of_function() until we
4935 start the real expansion process. */
4936 if (!crtl->uses_pic_offset_table)
4938 gcc_assert (can_create_pseudo_p ());
4939 if (arm_pic_register != INVALID_REGNUM)
4941 if (!cfun->machine->pic_reg)
4942 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4944 /* Play games to avoid marking the function as needing pic
4945 if we are being called as part of the cost-estimation
4947 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4948 crtl->uses_pic_offset_table = 1;
4954 if (!cfun->machine->pic_reg)
4955 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4957 /* Play games to avoid marking the function as needing pic
4958 if we are being called as part of the cost-estimation
4960 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4962 crtl->uses_pic_offset_table = 1;
4965 arm_load_pic_register (0UL);
4969 /* We can be called during expansion of PHI nodes, where
4970 we can't yet emit instructions directly in the final
4971 insn stream. Queue the insns on the entry edge, they will
4972 be committed after everything else is expanded. */
4973 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4980 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4982 if (GET_CODE (orig) == SYMBOL_REF
4983 || GET_CODE (orig) == LABEL_REF)
4989 gcc_assert (can_create_pseudo_p ());
4990 reg = gen_reg_rtx (Pmode);
4993 /* VxWorks does not impose a fixed gap between segments; the run-time
4994 gap can be different from the object-file gap. We therefore can't
4995 use GOTOFF unless we are absolutely sure that the symbol is in the
4996 same segment as the GOT. Unfortunately, the flexibility of linker
4997 scripts means that we can't be sure of that in general, so assume
4998 that GOTOFF is never valid on VxWorks. */
4999 if ((GET_CODE (orig) == LABEL_REF
5000 || (GET_CODE (orig) == SYMBOL_REF &&
5001 SYMBOL_REF_LOCAL_P (orig)))
5003 && !TARGET_VXWORKS_RTP)
5004 insn = arm_pic_static_addr (orig, reg);
5010 /* If this function doesn't have a pic register, create one now. */
5011 require_pic_register ();
5013 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5015 /* Make the MEM as close to a constant as possible. */
5016 mem = SET_SRC (pat);
5017 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5018 MEM_READONLY_P (mem) = 1;
5019 MEM_NOTRAP_P (mem) = 1;
5021 insn = emit_insn (pat);
5024 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5026 set_unique_reg_note (insn, REG_EQUAL, orig);
5030 else if (GET_CODE (orig) == CONST)
5034 if (GET_CODE (XEXP (orig, 0)) == PLUS
5035 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5038 /* Handle the case where we have: const (UNSPEC_TLS). */
5039 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5040 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5043 /* Handle the case where we have:
5044 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5046 if (GET_CODE (XEXP (orig, 0)) == PLUS
5047 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5048 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5050 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5056 gcc_assert (can_create_pseudo_p ());
5057 reg = gen_reg_rtx (Pmode);
5060 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5062 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5063 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5064 base == reg ? 0 : reg);
5066 if (GET_CODE (offset) == CONST_INT)
5068 /* The base register doesn't really matter, we only want to
5069 test the index for the appropriate mode. */
5070 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5072 gcc_assert (can_create_pseudo_p ());
5073 offset = force_reg (Pmode, offset);
5076 if (GET_CODE (offset) == CONST_INT)
5077 return plus_constant (base, INTVAL (offset));
5080 if (GET_MODE_SIZE (mode) > 4
5081 && (GET_MODE_CLASS (mode) == MODE_INT
5082 || TARGET_SOFT_FLOAT))
5084 emit_insn (gen_addsi3 (reg, base, offset));
5088 return gen_rtx_PLUS (Pmode, base, offset);
5095 /* Find a spare register to use during the prolog of a function. */
5098 thumb_find_work_register (unsigned long pushed_regs_mask)
5102 /* Check the argument registers first as these are call-used. The
5103 register allocation order means that sometimes r3 might be used
5104 but earlier argument registers might not, so check them all. */
5105 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5106 if (!df_regs_ever_live_p (reg))
5109 /* Before going on to check the call-saved registers we can try a couple
5110 more ways of deducing that r3 is available. The first is when we are
5111 pushing anonymous arguments onto the stack and we have less than 4
5112 registers worth of fixed arguments(*). In this case r3 will be part of
5113 the variable argument list and so we can be sure that it will be
5114 pushed right at the start of the function. Hence it will be available
5115 for the rest of the prologue.
5116 (*): ie crtl->args.pretend_args_size is greater than 0. */
5117 if (cfun->machine->uses_anonymous_args
5118 && crtl->args.pretend_args_size > 0)
5119 return LAST_ARG_REGNUM;
5121 /* The other case is when we have fixed arguments but less than 4 registers
5122 worth. In this case r3 might be used in the body of the function, but
5123 it is not being used to convey an argument into the function. In theory
5124 we could just check crtl->args.size to see how many bytes are
5125 being passed in argument registers, but it seems that it is unreliable.
5126 Sometimes it will have the value 0 when in fact arguments are being
5127 passed. (See testcase execute/20021111-1.c for an example). So we also
5128 check the args_info.nregs field as well. The problem with this field is
5129 that it makes no allowances for arguments that are passed to the
5130 function but which are not used. Hence we could miss an opportunity
5131 when a function has an unused argument in r3. But it is better to be
5132 safe than to be sorry. */
5133 if (! cfun->machine->uses_anonymous_args
5134 && crtl->args.size >= 0
5135 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5136 && crtl->args.info.nregs < 4)
5137 return LAST_ARG_REGNUM;
5139 /* Otherwise look for a call-saved register that is going to be pushed. */
5140 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5141 if (pushed_regs_mask & (1 << reg))
5146 /* Thumb-2 can use high regs. */
5147 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5148 if (pushed_regs_mask & (1 << reg))
5151 /* Something went wrong - thumb_compute_save_reg_mask()
5152 should have arranged for a suitable register to be pushed. */
5156 static GTY(()) int pic_labelno;
5158 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5162 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5164 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5166 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5169 gcc_assert (flag_pic);
5171 pic_reg = cfun->machine->pic_reg;
5172 if (TARGET_VXWORKS_RTP)
5174 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5175 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5176 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5178 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5180 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5181 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5185 /* We use an UNSPEC rather than a LABEL_REF because this label
5186 never appears in the code stream. */
5188 labelno = GEN_INT (pic_labelno++);
5189 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5190 l1 = gen_rtx_CONST (VOIDmode, l1);
5192 /* On the ARM the PC register contains 'dot + 8' at the time of the
5193 addition, on the Thumb it is 'dot + 4'. */
5194 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5195 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5197 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5201 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5203 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5205 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5207 else /* TARGET_THUMB1 */
5209 if (arm_pic_register != INVALID_REGNUM
5210 && REGNO (pic_reg) > LAST_LO_REGNUM)
5212 /* We will have pushed the pic register, so we should always be
5213 able to find a work register. */
5214 pic_tmp = gen_rtx_REG (SImode,
5215 thumb_find_work_register (saved_regs));
5216 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5217 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5220 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5221 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5225 /* Need to emit this whether or not we obey regdecls,
5226 since setjmp/longjmp can cause life info to screw up. */
5230 /* Generate code to load the address of a static var when flag_pic is set. */
5232 arm_pic_static_addr (rtx orig, rtx reg)
5234 rtx l1, labelno, offset_rtx, insn;
5236 gcc_assert (flag_pic);
5238 /* We use an UNSPEC rather than a LABEL_REF because this label
5239 never appears in the code stream. */
5240 labelno = GEN_INT (pic_labelno++);
5241 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5242 l1 = gen_rtx_CONST (VOIDmode, l1);
5244 /* On the ARM the PC register contains 'dot + 8' at the time of the
5245 addition, on the Thumb it is 'dot + 4'. */
5246 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5247 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5248 UNSPEC_SYMBOL_OFFSET);
5249 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5253 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5255 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5257 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5259 else /* TARGET_THUMB1 */
5261 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5262 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5268 /* Return nonzero if X is valid as an ARM state addressing register. */
5270 arm_address_register_rtx_p (rtx x, int strict_p)
5274 if (GET_CODE (x) != REG)
5280 return ARM_REGNO_OK_FOR_BASE_P (regno);
5282 return (regno <= LAST_ARM_REGNUM
5283 || regno >= FIRST_PSEUDO_REGISTER
5284 || regno == FRAME_POINTER_REGNUM
5285 || regno == ARG_POINTER_REGNUM);
5288 /* Return TRUE if this rtx is the difference of a symbol and a label,
5289 and will reduce to a PC-relative relocation in the object file.
5290 Expressions like this can be left alone when generating PIC, rather
5291 than forced through the GOT. */
5293 pcrel_constant_p (rtx x)
5295 if (GET_CODE (x) == MINUS)
5296 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5301 /* Return true if X will surely end up in an index register after next
5304 will_be_in_index_register (const_rtx x)
5306 /* arm.md: calculate_pic_address will split this into a register. */
5307 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5310 /* Return nonzero if X is a valid ARM state address operand. */
5312 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5316 enum rtx_code code = GET_CODE (x);
5318 if (arm_address_register_rtx_p (x, strict_p))
5321 use_ldrd = (TARGET_LDRD
5323 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5325 if (code == POST_INC || code == PRE_DEC
5326 || ((code == PRE_INC || code == POST_DEC)
5327 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5328 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5330 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5331 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5332 && GET_CODE (XEXP (x, 1)) == PLUS
5333 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5335 rtx addend = XEXP (XEXP (x, 1), 1);
5337 /* Don't allow ldrd post increment by register because it's hard
5338 to fixup invalid register choices. */
5340 && GET_CODE (x) == POST_MODIFY
5341 && GET_CODE (addend) == REG)
5344 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5345 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5348 /* After reload constants split into minipools will have addresses
5349 from a LABEL_REF. */
5350 else if (reload_completed
5351 && (code == LABEL_REF
5353 && GET_CODE (XEXP (x, 0)) == PLUS
5354 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5355 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5358 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5361 else if (code == PLUS)
5363 rtx xop0 = XEXP (x, 0);
5364 rtx xop1 = XEXP (x, 1);
5366 return ((arm_address_register_rtx_p (xop0, strict_p)
5367 && ((GET_CODE(xop1) == CONST_INT
5368 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5369 || (!strict_p && will_be_in_index_register (xop1))))
5370 || (arm_address_register_rtx_p (xop1, strict_p)
5371 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5375 /* Reload currently can't handle MINUS, so disable this for now */
5376 else if (GET_CODE (x) == MINUS)
5378 rtx xop0 = XEXP (x, 0);
5379 rtx xop1 = XEXP (x, 1);
5381 return (arm_address_register_rtx_p (xop0, strict_p)
5382 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5386 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5387 && code == SYMBOL_REF
5388 && CONSTANT_POOL_ADDRESS_P (x)
5390 && symbol_mentioned_p (get_pool_constant (x))
5391 && ! pcrel_constant_p (get_pool_constant (x))))
5397 /* Return nonzero if X is a valid Thumb-2 address operand. */
5399 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5402 enum rtx_code code = GET_CODE (x);
5404 if (arm_address_register_rtx_p (x, strict_p))
5407 use_ldrd = (TARGET_LDRD
5409 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5411 if (code == POST_INC || code == PRE_DEC
5412 || ((code == PRE_INC || code == POST_DEC)
5413 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5414 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5416 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5417 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5418 && GET_CODE (XEXP (x, 1)) == PLUS
5419 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5421 /* Thumb-2 only has autoincrement by constant. */
5422 rtx addend = XEXP (XEXP (x, 1), 1);
5423 HOST_WIDE_INT offset;
5425 if (GET_CODE (addend) != CONST_INT)
5428 offset = INTVAL(addend);
5429 if (GET_MODE_SIZE (mode) <= 4)
5430 return (offset > -256 && offset < 256);
5432 return (use_ldrd && offset > -1024 && offset < 1024
5433 && (offset & 3) == 0);
5436 /* After reload constants split into minipools will have addresses
5437 from a LABEL_REF. */
5438 else if (reload_completed
5439 && (code == LABEL_REF
5441 && GET_CODE (XEXP (x, 0)) == PLUS
5442 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5443 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5446 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5449 else if (code == PLUS)
5451 rtx xop0 = XEXP (x, 0);
5452 rtx xop1 = XEXP (x, 1);
5454 return ((arm_address_register_rtx_p (xop0, strict_p)
5455 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5456 || (!strict_p && will_be_in_index_register (xop1))))
5457 || (arm_address_register_rtx_p (xop1, strict_p)
5458 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5461 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5462 && code == SYMBOL_REF
5463 && CONSTANT_POOL_ADDRESS_P (x)
5465 && symbol_mentioned_p (get_pool_constant (x))
5466 && ! pcrel_constant_p (get_pool_constant (x))))
5472 /* Return nonzero if INDEX is valid for an address index operand in
5475 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5478 HOST_WIDE_INT range;
5479 enum rtx_code code = GET_CODE (index);
5481 /* Standard coprocessor addressing modes. */
5482 if (TARGET_HARD_FLOAT
5483 && (TARGET_FPA || TARGET_MAVERICK)
5484 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5485 || (TARGET_MAVERICK && mode == DImode)))
5486 return (code == CONST_INT && INTVAL (index) < 1024
5487 && INTVAL (index) > -1024
5488 && (INTVAL (index) & 3) == 0);
5491 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5492 return (code == CONST_INT
5493 && INTVAL (index) < 1016
5494 && INTVAL (index) > -1024
5495 && (INTVAL (index) & 3) == 0);
5497 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5498 return (code == CONST_INT
5499 && INTVAL (index) < 1024
5500 && INTVAL (index) > -1024
5501 && (INTVAL (index) & 3) == 0);
5503 if (arm_address_register_rtx_p (index, strict_p)
5504 && (GET_MODE_SIZE (mode) <= 4))
5507 if (mode == DImode || mode == DFmode)
5509 if (code == CONST_INT)
5511 HOST_WIDE_INT val = INTVAL (index);
5514 return val > -256 && val < 256;
5516 return val > -4096 && val < 4092;
5519 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5522 if (GET_MODE_SIZE (mode) <= 4
5526 || (mode == QImode && outer == SIGN_EXTEND))))
5530 rtx xiop0 = XEXP (index, 0);
5531 rtx xiop1 = XEXP (index, 1);
5533 return ((arm_address_register_rtx_p (xiop0, strict_p)
5534 && power_of_two_operand (xiop1, SImode))
5535 || (arm_address_register_rtx_p (xiop1, strict_p)
5536 && power_of_two_operand (xiop0, SImode)));
5538 else if (code == LSHIFTRT || code == ASHIFTRT
5539 || code == ASHIFT || code == ROTATERT)
5541 rtx op = XEXP (index, 1);
5543 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5544 && GET_CODE (op) == CONST_INT
5546 && INTVAL (op) <= 31);
5550 /* For ARM v4 we may be doing a sign-extend operation during the
5556 || (outer == SIGN_EXTEND && mode == QImode))
5562 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5564 return (code == CONST_INT
5565 && INTVAL (index) < range
5566 && INTVAL (index) > -range);
5569 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5570 index operand. i.e. 1, 2, 4 or 8. */
5572 thumb2_index_mul_operand (rtx op)
5576 if (GET_CODE(op) != CONST_INT)
5580 return (val == 1 || val == 2 || val == 4 || val == 8);
5583 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5585 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5587 enum rtx_code code = GET_CODE (index);
5589 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5590 /* Standard coprocessor addressing modes. */
5591 if (TARGET_HARD_FLOAT
5592 && (TARGET_FPA || TARGET_MAVERICK)
5593 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5594 || (TARGET_MAVERICK && mode == DImode)))
5595 return (code == CONST_INT && INTVAL (index) < 1024
5596 && INTVAL (index) > -1024
5597 && (INTVAL (index) & 3) == 0);
5599 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5601 /* For DImode assume values will usually live in core regs
5602 and only allow LDRD addressing modes. */
5603 if (!TARGET_LDRD || mode != DImode)
5604 return (code == CONST_INT
5605 && INTVAL (index) < 1024
5606 && INTVAL (index) > -1024
5607 && (INTVAL (index) & 3) == 0);
5611 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5612 return (code == CONST_INT
5613 && INTVAL (index) < 1016
5614 && INTVAL (index) > -1024
5615 && (INTVAL (index) & 3) == 0);
5617 if (arm_address_register_rtx_p (index, strict_p)
5618 && (GET_MODE_SIZE (mode) <= 4))
5621 if (mode == DImode || mode == DFmode)
5623 if (code == CONST_INT)
5625 HOST_WIDE_INT val = INTVAL (index);
5626 /* ??? Can we assume ldrd for thumb2? */
5627 /* Thumb-2 ldrd only has reg+const addressing modes. */
5628 /* ldrd supports offsets of +-1020.
5629 However the ldr fallback does not. */
5630 return val > -256 && val < 256 && (val & 3) == 0;
5638 rtx xiop0 = XEXP (index, 0);
5639 rtx xiop1 = XEXP (index, 1);
5641 return ((arm_address_register_rtx_p (xiop0, strict_p)
5642 && thumb2_index_mul_operand (xiop1))
5643 || (arm_address_register_rtx_p (xiop1, strict_p)
5644 && thumb2_index_mul_operand (xiop0)));
5646 else if (code == ASHIFT)
5648 rtx op = XEXP (index, 1);
5650 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5651 && GET_CODE (op) == CONST_INT
5653 && INTVAL (op) <= 3);
5656 return (code == CONST_INT
5657 && INTVAL (index) < 4096
5658 && INTVAL (index) > -256);
5661 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5663 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5667 if (GET_CODE (x) != REG)
5673 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5675 return (regno <= LAST_LO_REGNUM
5676 || regno > LAST_VIRTUAL_REGISTER
5677 || regno == FRAME_POINTER_REGNUM
5678 || (GET_MODE_SIZE (mode) >= 4
5679 && (regno == STACK_POINTER_REGNUM
5680 || regno >= FIRST_PSEUDO_REGISTER
5681 || x == hard_frame_pointer_rtx
5682 || x == arg_pointer_rtx)));
5685 /* Return nonzero if x is a legitimate index register. This is the case
5686 for any base register that can access a QImode object. */
5688 thumb1_index_register_rtx_p (rtx x, int strict_p)
5690 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5693 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5695 The AP may be eliminated to either the SP or the FP, so we use the
5696 least common denominator, e.g. SImode, and offsets from 0 to 64.
5698 ??? Verify whether the above is the right approach.
5700 ??? Also, the FP may be eliminated to the SP, so perhaps that
5701 needs special handling also.
5703 ??? Look at how the mips16 port solves this problem. It probably uses
5704 better ways to solve some of these problems.
5706 Although it is not incorrect, we don't accept QImode and HImode
5707 addresses based on the frame pointer or arg pointer until the
5708 reload pass starts. This is so that eliminating such addresses
5709 into stack based ones won't produce impossible code. */
5711 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5713 /* ??? Not clear if this is right. Experiment. */
5714 if (GET_MODE_SIZE (mode) < 4
5715 && !(reload_in_progress || reload_completed)
5716 && (reg_mentioned_p (frame_pointer_rtx, x)
5717 || reg_mentioned_p (arg_pointer_rtx, x)
5718 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5719 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5720 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5721 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5724 /* Accept any base register. SP only in SImode or larger. */
5725 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5728 /* This is PC relative data before arm_reorg runs. */
5729 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5730 && GET_CODE (x) == SYMBOL_REF
5731 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5734 /* This is PC relative data after arm_reorg runs. */
5735 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5737 && (GET_CODE (x) == LABEL_REF
5738 || (GET_CODE (x) == CONST
5739 && GET_CODE (XEXP (x, 0)) == PLUS
5740 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5741 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5744 /* Post-inc indexing only supported for SImode and larger. */
5745 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5746 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5749 else if (GET_CODE (x) == PLUS)
5751 /* REG+REG address can be any two index registers. */
5752 /* We disallow FRAME+REG addressing since we know that FRAME
5753 will be replaced with STACK, and SP relative addressing only
5754 permits SP+OFFSET. */
5755 if (GET_MODE_SIZE (mode) <= 4
5756 && XEXP (x, 0) != frame_pointer_rtx
5757 && XEXP (x, 1) != frame_pointer_rtx
5758 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5759 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5760 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5763 /* REG+const has 5-7 bit offset for non-SP registers. */
5764 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5765 || XEXP (x, 0) == arg_pointer_rtx)
5766 && GET_CODE (XEXP (x, 1)) == CONST_INT
5767 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5770 /* REG+const has 10-bit offset for SP, but only SImode and
5771 larger is supported. */
5772 /* ??? Should probably check for DI/DFmode overflow here
5773 just like GO_IF_LEGITIMATE_OFFSET does. */
5774 else if (GET_CODE (XEXP (x, 0)) == REG
5775 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5776 && GET_MODE_SIZE (mode) >= 4
5777 && GET_CODE (XEXP (x, 1)) == CONST_INT
5778 && INTVAL (XEXP (x, 1)) >= 0
5779 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5780 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5783 else if (GET_CODE (XEXP (x, 0)) == REG
5784 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5785 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5786 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5787 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5788 && GET_MODE_SIZE (mode) >= 4
5789 && GET_CODE (XEXP (x, 1)) == CONST_INT
5790 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5794 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5795 && GET_MODE_SIZE (mode) == 4
5796 && GET_CODE (x) == SYMBOL_REF
5797 && CONSTANT_POOL_ADDRESS_P (x)
5799 && symbol_mentioned_p (get_pool_constant (x))
5800 && ! pcrel_constant_p (get_pool_constant (x))))
5806 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5807 instruction of mode MODE. */
5809 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5811 switch (GET_MODE_SIZE (mode))
5814 return val >= 0 && val < 32;
5817 return val >= 0 && val < 64 && (val & 1) == 0;
5821 && (val + GET_MODE_SIZE (mode)) <= 128
5827 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5830 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5831 else if (TARGET_THUMB2)
5832 return thumb2_legitimate_address_p (mode, x, strict_p);
5833 else /* if (TARGET_THUMB1) */
5834 return thumb1_legitimate_address_p (mode, x, strict_p);
5837 /* Build the SYMBOL_REF for __tls_get_addr. */
5839 static GTY(()) rtx tls_get_addr_libfunc;
5842 get_tls_get_addr (void)
5844 if (!tls_get_addr_libfunc)
5845 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5846 return tls_get_addr_libfunc;
5850 arm_load_tp (rtx target)
5853 target = gen_reg_rtx (SImode);
5857 /* Can return in any reg. */
5858 emit_insn (gen_load_tp_hard (target));
5862 /* Always returned in r0. Immediately copy the result into a pseudo,
5863 otherwise other uses of r0 (e.g. setting up function arguments) may
5864 clobber the value. */
5868 emit_insn (gen_load_tp_soft ());
5870 tmp = gen_rtx_REG (SImode, 0);
5871 emit_move_insn (target, tmp);
5877 load_tls_operand (rtx x, rtx reg)
5881 if (reg == NULL_RTX)
5882 reg = gen_reg_rtx (SImode);
5884 tmp = gen_rtx_CONST (SImode, x);
5886 emit_move_insn (reg, tmp);
5892 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5894 rtx insns, label, labelno, sum;
5898 labelno = GEN_INT (pic_labelno++);
5899 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5900 label = gen_rtx_CONST (VOIDmode, label);
5902 sum = gen_rtx_UNSPEC (Pmode,
5903 gen_rtvec (4, x, GEN_INT (reloc), label,
5904 GEN_INT (TARGET_ARM ? 8 : 4)),
5906 reg = load_tls_operand (sum, reg);
5909 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5910 else if (TARGET_THUMB2)
5911 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5912 else /* TARGET_THUMB1 */
5913 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5915 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5916 Pmode, 1, reg, Pmode);
5918 insns = get_insns ();
5925 legitimize_tls_address (rtx x, rtx reg)
5927 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5928 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5932 case TLS_MODEL_GLOBAL_DYNAMIC:
5933 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5934 dest = gen_reg_rtx (Pmode);
5935 emit_libcall_block (insns, dest, ret, x);
5938 case TLS_MODEL_LOCAL_DYNAMIC:
5939 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5941 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5942 share the LDM result with other LD model accesses. */
5943 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5945 dest = gen_reg_rtx (Pmode);
5946 emit_libcall_block (insns, dest, ret, eqv);
5948 /* Load the addend. */
5949 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5951 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5952 return gen_rtx_PLUS (Pmode, dest, addend);
5954 case TLS_MODEL_INITIAL_EXEC:
5955 labelno = GEN_INT (pic_labelno++);
5956 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5957 label = gen_rtx_CONST (VOIDmode, label);
5958 sum = gen_rtx_UNSPEC (Pmode,
5959 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5960 GEN_INT (TARGET_ARM ? 8 : 4)),
5962 reg = load_tls_operand (sum, reg);
5965 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5966 else if (TARGET_THUMB2)
5967 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
5970 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5971 emit_move_insn (reg, gen_const_mem (SImode, reg));
5974 tp = arm_load_tp (NULL_RTX);
5976 return gen_rtx_PLUS (Pmode, tp, reg);
5978 case TLS_MODEL_LOCAL_EXEC:
5979 tp = arm_load_tp (NULL_RTX);
5981 reg = gen_rtx_UNSPEC (Pmode,
5982 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5984 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5986 return gen_rtx_PLUS (Pmode, tp, reg);
5993 /* Try machine-dependent ways of modifying an illegitimate address
5994 to be legitimate. If we find one, return the new, valid address. */
5996 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6000 /* TODO: legitimize_address for Thumb2. */
6003 return thumb_legitimize_address (x, orig_x, mode);
6006 if (arm_tls_symbol_p (x))
6007 return legitimize_tls_address (x, NULL_RTX);
6009 if (GET_CODE (x) == PLUS)
6011 rtx xop0 = XEXP (x, 0);
6012 rtx xop1 = XEXP (x, 1);
6014 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6015 xop0 = force_reg (SImode, xop0);
6017 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6018 xop1 = force_reg (SImode, xop1);
6020 if (ARM_BASE_REGISTER_RTX_P (xop0)
6021 && GET_CODE (xop1) == CONST_INT)
6023 HOST_WIDE_INT n, low_n;
6027 /* VFP addressing modes actually allow greater offsets, but for
6028 now we just stick with the lowest common denominator. */
6030 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6042 low_n = ((mode) == TImode ? 0
6043 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6047 base_reg = gen_reg_rtx (SImode);
6048 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6049 emit_move_insn (base_reg, val);
6050 x = plus_constant (base_reg, low_n);
6052 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6053 x = gen_rtx_PLUS (SImode, xop0, xop1);
6056 /* XXX We don't allow MINUS any more -- see comment in
6057 arm_legitimate_address_outer_p (). */
6058 else if (GET_CODE (x) == MINUS)
6060 rtx xop0 = XEXP (x, 0);
6061 rtx xop1 = XEXP (x, 1);
6063 if (CONSTANT_P (xop0))
6064 xop0 = force_reg (SImode, xop0);
6066 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6067 xop1 = force_reg (SImode, xop1);
6069 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6070 x = gen_rtx_MINUS (SImode, xop0, xop1);
6073 /* Make sure to take full advantage of the pre-indexed addressing mode
6074 with absolute addresses which often allows for the base register to
6075 be factorized for multiple adjacent memory references, and it might
6076 even allows for the mini pool to be avoided entirely. */
6077 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6080 HOST_WIDE_INT mask, base, index;
6083 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6084 use a 8-bit index. So let's use a 12-bit index for SImode only and
6085 hope that arm_gen_constant will enable ldrb to use more bits. */
6086 bits = (mode == SImode) ? 12 : 8;
6087 mask = (1 << bits) - 1;
6088 base = INTVAL (x) & ~mask;
6089 index = INTVAL (x) & mask;
6090 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6092 /* It'll most probably be more efficient to generate the base
6093 with more bits set and use a negative index instead. */
6097 base_reg = force_reg (SImode, GEN_INT (base));
6098 x = plus_constant (base_reg, index);
6103 /* We need to find and carefully transform any SYMBOL and LABEL
6104 references; so go back to the original address expression. */
6105 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6107 if (new_x != orig_x)
6115 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6116 to be legitimate. If we find one, return the new, valid address. */
6118 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6120 if (arm_tls_symbol_p (x))
6121 return legitimize_tls_address (x, NULL_RTX);
6123 if (GET_CODE (x) == PLUS
6124 && GET_CODE (XEXP (x, 1)) == CONST_INT
6125 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6126 || INTVAL (XEXP (x, 1)) < 0))
6128 rtx xop0 = XEXP (x, 0);
6129 rtx xop1 = XEXP (x, 1);
6130 HOST_WIDE_INT offset = INTVAL (xop1);
6132 /* Try and fold the offset into a biasing of the base register and
6133 then offsetting that. Don't do this when optimizing for space
6134 since it can cause too many CSEs. */
6135 if (optimize_size && offset >= 0
6136 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6138 HOST_WIDE_INT delta;
6141 delta = offset - (256 - GET_MODE_SIZE (mode));
6142 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6143 delta = 31 * GET_MODE_SIZE (mode);
6145 delta = offset & (~31 * GET_MODE_SIZE (mode));
6147 xop0 = force_operand (plus_constant (xop0, offset - delta),
6149 x = plus_constant (xop0, delta);
6151 else if (offset < 0 && offset > -256)
6152 /* Small negative offsets are best done with a subtract before the
6153 dereference, forcing these into a register normally takes two
6155 x = force_operand (x, NULL_RTX);
6158 /* For the remaining cases, force the constant into a register. */
6159 xop1 = force_reg (SImode, xop1);
6160 x = gen_rtx_PLUS (SImode, xop0, xop1);
6163 else if (GET_CODE (x) == PLUS
6164 && s_register_operand (XEXP (x, 1), SImode)
6165 && !s_register_operand (XEXP (x, 0), SImode))
6167 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6169 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6174 /* We need to find and carefully transform any SYMBOL and LABEL
6175 references; so go back to the original address expression. */
6176 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6178 if (new_x != orig_x)
6186 thumb_legitimize_reload_address (rtx *x_p,
6187 enum machine_mode mode,
6188 int opnum, int type,
6189 int ind_levels ATTRIBUTE_UNUSED)
6193 if (GET_CODE (x) == PLUS
6194 && GET_MODE_SIZE (mode) < 4
6195 && REG_P (XEXP (x, 0))
6196 && XEXP (x, 0) == stack_pointer_rtx
6197 && GET_CODE (XEXP (x, 1)) == CONST_INT
6198 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6203 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6204 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6208 /* If both registers are hi-regs, then it's better to reload the
6209 entire expression rather than each register individually. That
6210 only requires one reload register rather than two. */
6211 if (GET_CODE (x) == PLUS
6212 && REG_P (XEXP (x, 0))
6213 && REG_P (XEXP (x, 1))
6214 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6215 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6220 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6221 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6228 /* Test for various thread-local symbols. */
6230 /* Return TRUE if X is a thread-local symbol. */
6233 arm_tls_symbol_p (rtx x)
6235 if (! TARGET_HAVE_TLS)
6238 if (GET_CODE (x) != SYMBOL_REF)
6241 return SYMBOL_REF_TLS_MODEL (x) != 0;
6244 /* Helper for arm_tls_referenced_p. */
6247 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6249 if (GET_CODE (*x) == SYMBOL_REF)
6250 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6252 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6253 TLS offsets, not real symbol references. */
6254 if (GET_CODE (*x) == UNSPEC
6255 && XINT (*x, 1) == UNSPEC_TLS)
6261 /* Return TRUE if X contains any TLS symbol references. */
6264 arm_tls_referenced_p (rtx x)
6266 if (! TARGET_HAVE_TLS)
6269 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6272 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6275 arm_cannot_force_const_mem (rtx x)
6279 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6281 split_const (x, &base, &offset);
6282 if (GET_CODE (base) == SYMBOL_REF
6283 && !offset_within_block_p (base, INTVAL (offset)))
6286 return arm_tls_referenced_p (x);
6289 #define REG_OR_SUBREG_REG(X) \
6290 (GET_CODE (X) == REG \
6291 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6293 #define REG_OR_SUBREG_RTX(X) \
6294 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6297 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6299 enum machine_mode mode = GET_MODE (x);
6313 return COSTS_N_INSNS (1);
6316 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6319 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6326 return COSTS_N_INSNS (2) + cycles;
6328 return COSTS_N_INSNS (1) + 16;
6331 return (COSTS_N_INSNS (1)
6332 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6333 + GET_CODE (SET_DEST (x)) == MEM));
6338 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6340 if (thumb_shiftable_const (INTVAL (x)))
6341 return COSTS_N_INSNS (2);
6342 return COSTS_N_INSNS (3);
6344 else if ((outer == PLUS || outer == COMPARE)
6345 && INTVAL (x) < 256 && INTVAL (x) > -256)
6347 else if ((outer == IOR || outer == XOR || outer == AND)
6348 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6349 return COSTS_N_INSNS (1);
6350 else if (outer == AND)
6353 /* This duplicates the tests in the andsi3 expander. */
6354 for (i = 9; i <= 31; i++)
6355 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6356 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6357 return COSTS_N_INSNS (2);
6359 else if (outer == ASHIFT || outer == ASHIFTRT
6360 || outer == LSHIFTRT)
6362 return COSTS_N_INSNS (2);
6368 return COSTS_N_INSNS (3);
6386 /* XXX another guess. */
6387 /* Memory costs quite a lot for the first word, but subsequent words
6388 load at the equivalent of a single insn each. */
6389 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6390 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6395 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6401 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6402 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6408 return total + COSTS_N_INSNS (1);
6410 /* Assume a two-shift sequence. Increase the cost slightly so
6411 we prefer actual shifts over an extend operation. */
6412 return total + 1 + COSTS_N_INSNS (2);
6420 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6422 enum machine_mode mode = GET_MODE (x);
6423 enum rtx_code subcode;
6425 enum rtx_code code = GET_CODE (x);
6431 /* Memory costs quite a lot for the first word, but subsequent words
6432 load at the equivalent of a single insn each. */
6433 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6440 if (TARGET_HARD_FLOAT && mode == SFmode)
6441 *total = COSTS_N_INSNS (2);
6442 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6443 *total = COSTS_N_INSNS (4);
6445 *total = COSTS_N_INSNS (20);
6449 if (GET_CODE (XEXP (x, 1)) == REG)
6450 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6451 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6452 *total = rtx_cost (XEXP (x, 1), code, speed);
6458 *total += COSTS_N_INSNS (4);
6463 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6464 *total += rtx_cost (XEXP (x, 0), code, speed);
6467 *total += COSTS_N_INSNS (3);
6471 *total += COSTS_N_INSNS (1);
6472 /* Increase the cost of complex shifts because they aren't any faster,
6473 and reduce dual issue opportunities. */
6474 if (arm_tune_cortex_a9
6475 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6483 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6484 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6485 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6487 *total += rtx_cost (XEXP (x, 1), code, speed);
6491 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6492 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6494 *total += rtx_cost (XEXP (x, 0), code, speed);
6501 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6503 if (TARGET_HARD_FLOAT
6505 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6507 *total = COSTS_N_INSNS (1);
6508 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6509 && arm_const_double_rtx (XEXP (x, 0)))
6511 *total += rtx_cost (XEXP (x, 1), code, speed);
6515 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6516 && arm_const_double_rtx (XEXP (x, 1)))
6518 *total += rtx_cost (XEXP (x, 0), code, speed);
6524 *total = COSTS_N_INSNS (20);
6528 *total = COSTS_N_INSNS (1);
6529 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6530 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6532 *total += rtx_cost (XEXP (x, 1), code, speed);
6536 subcode = GET_CODE (XEXP (x, 1));
6537 if (subcode == ASHIFT || subcode == ASHIFTRT
6538 || subcode == LSHIFTRT
6539 || subcode == ROTATE || subcode == ROTATERT)
6541 *total += rtx_cost (XEXP (x, 0), code, speed);
6542 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6546 /* A shift as a part of RSB costs no more than RSB itself. */
6547 if (GET_CODE (XEXP (x, 0)) == MULT
6548 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6550 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6551 *total += rtx_cost (XEXP (x, 1), code, speed);
6556 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6558 *total += rtx_cost (XEXP (x, 0), code, speed);
6559 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6563 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6564 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6566 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6567 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6568 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6569 *total += COSTS_N_INSNS (1);
6577 if (code == PLUS && arm_arch6 && mode == SImode
6578 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6579 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6581 *total = COSTS_N_INSNS (1);
6582 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6584 *total += rtx_cost (XEXP (x, 1), code, speed);
6588 /* MLA: All arguments must be registers. We filter out
6589 multiplication by a power of two, so that we fall down into
6591 if (GET_CODE (XEXP (x, 0)) == MULT
6592 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6594 /* The cost comes from the cost of the multiply. */
6598 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6600 if (TARGET_HARD_FLOAT
6602 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6604 *total = COSTS_N_INSNS (1);
6605 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6606 && arm_const_double_rtx (XEXP (x, 1)))
6608 *total += rtx_cost (XEXP (x, 0), code, speed);
6615 *total = COSTS_N_INSNS (20);
6619 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6620 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6622 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6623 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6624 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6625 *total += COSTS_N_INSNS (1);
6631 case AND: case XOR: case IOR:
6633 /* Normally the frame registers will be spilt into reg+const during
6634 reload, so it is a bad idea to combine them with other instructions,
6635 since then they might not be moved outside of loops. As a compromise
6636 we allow integration with ops that have a constant as their second
6638 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
6639 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6640 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6641 || (REG_OR_SUBREG_REG (XEXP (x, 0))
6642 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
6647 *total += COSTS_N_INSNS (2);
6648 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6649 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6651 *total += rtx_cost (XEXP (x, 0), code, speed);
6658 *total += COSTS_N_INSNS (1);
6659 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6660 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6662 *total += rtx_cost (XEXP (x, 0), code, speed);
6665 subcode = GET_CODE (XEXP (x, 0));
6666 if (subcode == ASHIFT || subcode == ASHIFTRT
6667 || subcode == LSHIFTRT
6668 || subcode == ROTATE || subcode == ROTATERT)
6670 *total += rtx_cost (XEXP (x, 1), code, speed);
6671 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6676 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6678 *total += rtx_cost (XEXP (x, 1), code, speed);
6679 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6683 if (subcode == UMIN || subcode == UMAX
6684 || subcode == SMIN || subcode == SMAX)
6686 *total = COSTS_N_INSNS (3);
6693 /* This should have been handled by the CPU specific routines. */
6697 if (arm_arch3m && mode == SImode
6698 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6699 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6700 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6701 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6702 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6703 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6705 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6708 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6712 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6714 if (TARGET_HARD_FLOAT
6716 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6718 *total = COSTS_N_INSNS (1);
6721 *total = COSTS_N_INSNS (2);
6727 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6728 if (mode == SImode && code == NOT)
6730 subcode = GET_CODE (XEXP (x, 0));
6731 if (subcode == ASHIFT || subcode == ASHIFTRT
6732 || subcode == LSHIFTRT
6733 || subcode == ROTATE || subcode == ROTATERT
6735 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6737 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6738 /* Register shifts cost an extra cycle. */
6739 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6740 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6749 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6751 *total = COSTS_N_INSNS (4);
6755 operand = XEXP (x, 0);
6757 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6758 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6759 && GET_CODE (XEXP (operand, 0)) == REG
6760 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6761 *total += COSTS_N_INSNS (1);
6762 *total += (rtx_cost (XEXP (x, 1), code, speed)
6763 + rtx_cost (XEXP (x, 2), code, speed));
6767 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6769 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6775 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6776 && mode == SImode && XEXP (x, 1) == const0_rtx)
6778 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6784 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6785 && mode == SImode && XEXP (x, 1) == const0_rtx)
6787 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6807 /* SCC insns. In the case where the comparison has already been
6808 performed, then they cost 2 instructions. Otherwise they need
6809 an additional comparison before them. */
6810 *total = COSTS_N_INSNS (2);
6811 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6818 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6824 *total += COSTS_N_INSNS (1);
6825 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6826 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6828 *total += rtx_cost (XEXP (x, 0), code, speed);
6832 subcode = GET_CODE (XEXP (x, 0));
6833 if (subcode == ASHIFT || subcode == ASHIFTRT
6834 || subcode == LSHIFTRT
6835 || subcode == ROTATE || subcode == ROTATERT)
6837 *total += rtx_cost (XEXP (x, 1), code, speed);
6838 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6843 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6845 *total += rtx_cost (XEXP (x, 1), code, speed);
6846 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6856 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6857 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6858 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6859 *total += rtx_cost (XEXP (x, 1), code, speed);
6863 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6865 if (TARGET_HARD_FLOAT
6867 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6869 *total = COSTS_N_INSNS (1);
6872 *total = COSTS_N_INSNS (20);
6875 *total = COSTS_N_INSNS (1);
6877 *total += COSTS_N_INSNS (3);
6883 if (GET_MODE_CLASS (mode) == MODE_INT)
6885 rtx op = XEXP (x, 0);
6886 enum machine_mode opmode = GET_MODE (op);
6889 *total += COSTS_N_INSNS (1);
6891 if (opmode != SImode)
6895 /* If !arm_arch4, we use one of the extendhisi2_mem
6896 or movhi_bytes patterns for HImode. For a QImode
6897 sign extension, we first zero-extend from memory
6898 and then perform a shift sequence. */
6899 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
6900 *total += COSTS_N_INSNS (2);
6903 *total += COSTS_N_INSNS (1);
6905 /* We don't have the necessary insn, so we need to perform some
6907 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
6908 /* An and with constant 255. */
6909 *total += COSTS_N_INSNS (1);
6911 /* A shift sequence. Increase costs slightly to avoid
6912 combining two shifts into an extend operation. */
6913 *total += COSTS_N_INSNS (2) + 1;
6919 switch (GET_MODE (XEXP (x, 0)))
6926 *total = COSTS_N_INSNS (1);
6936 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6940 if (const_ok_for_arm (INTVAL (x))
6941 || const_ok_for_arm (~INTVAL (x)))
6942 *total = COSTS_N_INSNS (1);
6944 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6945 INTVAL (x), NULL_RTX,
6952 *total = COSTS_N_INSNS (3);
6956 *total = COSTS_N_INSNS (1);
6960 *total = COSTS_N_INSNS (1);
6961 *total += rtx_cost (XEXP (x, 0), code, speed);
6965 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
6966 && (mode == SFmode || !TARGET_VFP_SINGLE))
6967 *total = COSTS_N_INSNS (1);
6969 *total = COSTS_N_INSNS (4);
6973 *total = COSTS_N_INSNS (4);
6978 /* Estimates the size cost of thumb1 instructions.
6979 For now most of the code is copied from thumb1_rtx_costs. We need more
6980 fine grain tuning when we have more related test cases. */
6982 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6984 enum machine_mode mode = GET_MODE (x);
6997 return COSTS_N_INSNS (1);
7000 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7002 /* Thumb1 mul instruction can't operate on const. We must Load it
7003 into a register first. */
7004 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7005 return COSTS_N_INSNS (1) + const_size;
7007 return COSTS_N_INSNS (1);
7010 return (COSTS_N_INSNS (1)
7011 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7012 + GET_CODE (SET_DEST (x)) == MEM));
7017 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7018 return COSTS_N_INSNS (1);
7019 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7020 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7021 return COSTS_N_INSNS (2);
7022 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7023 if (thumb_shiftable_const (INTVAL (x)))
7024 return COSTS_N_INSNS (2);
7025 return COSTS_N_INSNS (3);
7027 else if ((outer == PLUS || outer == COMPARE)
7028 && INTVAL (x) < 256 && INTVAL (x) > -256)
7030 else if ((outer == IOR || outer == XOR || outer == AND)
7031 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7032 return COSTS_N_INSNS (1);
7033 else if (outer == AND)
7036 /* This duplicates the tests in the andsi3 expander. */
7037 for (i = 9; i <= 31; i++)
7038 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7039 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7040 return COSTS_N_INSNS (2);
7042 else if (outer == ASHIFT || outer == ASHIFTRT
7043 || outer == LSHIFTRT)
7045 return COSTS_N_INSNS (2);
7051 return COSTS_N_INSNS (3);
7069 /* XXX another guess. */
7070 /* Memory costs quite a lot for the first word, but subsequent words
7071 load at the equivalent of a single insn each. */
7072 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7073 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7078 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7083 /* XXX still guessing. */
7084 switch (GET_MODE (XEXP (x, 0)))
7087 return (1 + (mode == DImode ? 4 : 0)
7088 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7091 return (4 + (mode == DImode ? 4 : 0)
7092 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7095 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7106 /* RTX costs when optimizing for size. */
7108 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7111 enum machine_mode mode = GET_MODE (x);
7114 *total = thumb1_size_rtx_costs (x, code, outer_code);
7118 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7122 /* A memory access costs 1 insn if the mode is small, or the address is
7123 a single register, otherwise it costs one insn per word. */
7124 if (REG_P (XEXP (x, 0)))
7125 *total = COSTS_N_INSNS (1);
7127 && GET_CODE (XEXP (x, 0)) == PLUS
7128 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7129 /* This will be split into two instructions.
7130 See arm.md:calculate_pic_address. */
7131 *total = COSTS_N_INSNS (2);
7133 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7140 /* Needs a libcall, so it costs about this. */
7141 *total = COSTS_N_INSNS (2);
7145 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7147 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7155 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7157 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7160 else if (mode == SImode)
7162 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7163 /* Slightly disparage register shifts, but not by much. */
7164 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7165 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7169 /* Needs a libcall. */
7170 *total = COSTS_N_INSNS (2);
7174 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7175 && (mode == SFmode || !TARGET_VFP_SINGLE))
7177 *total = COSTS_N_INSNS (1);
7183 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7184 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7186 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7187 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7188 || subcode1 == ROTATE || subcode1 == ROTATERT
7189 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7190 || subcode1 == ASHIFTRT)
7192 /* It's just the cost of the two operands. */
7197 *total = COSTS_N_INSNS (1);
7201 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7205 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7206 && (mode == SFmode || !TARGET_VFP_SINGLE))
7208 *total = COSTS_N_INSNS (1);
7212 /* A shift as a part of ADD costs nothing. */
7213 if (GET_CODE (XEXP (x, 0)) == MULT
7214 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7216 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7217 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7218 *total += rtx_cost (XEXP (x, 1), code, false);
7223 case AND: case XOR: case IOR:
7226 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7228 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7229 || subcode == LSHIFTRT || subcode == ASHIFTRT
7230 || (code == AND && subcode == NOT))
7232 /* It's just the cost of the two operands. */
7238 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7242 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7246 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7247 && (mode == SFmode || !TARGET_VFP_SINGLE))
7249 *total = COSTS_N_INSNS (1);
7255 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7264 if (cc_register (XEXP (x, 0), VOIDmode))
7267 *total = COSTS_N_INSNS (1);
7271 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7272 && (mode == SFmode || !TARGET_VFP_SINGLE))
7273 *total = COSTS_N_INSNS (1);
7275 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7280 return arm_rtx_costs_1 (x, outer_code, total, 0);
7283 if (const_ok_for_arm (INTVAL (x)))
7284 /* A multiplication by a constant requires another instruction
7285 to load the constant to a register. */
7286 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7288 else if (const_ok_for_arm (~INTVAL (x)))
7289 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7290 else if (const_ok_for_arm (-INTVAL (x)))
7292 if (outer_code == COMPARE || outer_code == PLUS
7293 || outer_code == MINUS)
7296 *total = COSTS_N_INSNS (1);
7299 *total = COSTS_N_INSNS (2);
7305 *total = COSTS_N_INSNS (2);
7309 *total = COSTS_N_INSNS (4);
7314 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7315 cost of these slightly. */
7316 *total = COSTS_N_INSNS (1) + 1;
7320 if (mode != VOIDmode)
7321 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7323 *total = COSTS_N_INSNS (4); /* How knows? */
7328 /* RTX costs when optimizing for size. */
7330 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7334 return arm_size_rtx_costs (x, (enum rtx_code) code,
7335 (enum rtx_code) outer_code, total);
7337 return current_tune->rtx_costs (x, (enum rtx_code) code,
7338 (enum rtx_code) outer_code,
7342 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7343 supported on any "slowmul" cores, so it can be ignored. */
7346 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7347 int *total, bool speed)
7349 enum machine_mode mode = GET_MODE (x);
7353 *total = thumb1_rtx_costs (x, code, outer_code);
7360 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7363 *total = COSTS_N_INSNS (20);
7367 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7369 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7370 & (unsigned HOST_WIDE_INT) 0xffffffff);
7371 int cost, const_ok = const_ok_for_arm (i);
7372 int j, booth_unit_size;
7374 /* Tune as appropriate. */
7375 cost = const_ok ? 4 : 8;
7376 booth_unit_size = 2;
7377 for (j = 0; i && j < 32; j += booth_unit_size)
7379 i >>= booth_unit_size;
7383 *total = COSTS_N_INSNS (cost);
7384 *total += rtx_cost (XEXP (x, 0), code, speed);
7388 *total = COSTS_N_INSNS (20);
7392 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7397 /* RTX cost for cores with a fast multiply unit (M variants). */
7400 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7401 int *total, bool speed)
7403 enum machine_mode mode = GET_MODE (x);
7407 *total = thumb1_rtx_costs (x, code, outer_code);
7411 /* ??? should thumb2 use different costs? */
7415 /* There is no point basing this on the tuning, since it is always the
7416 fast variant if it exists at all. */
7418 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7419 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7420 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7422 *total = COSTS_N_INSNS(2);
7429 *total = COSTS_N_INSNS (5);
7433 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7435 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7436 & (unsigned HOST_WIDE_INT) 0xffffffff);
7437 int cost, const_ok = const_ok_for_arm (i);
7438 int j, booth_unit_size;
7440 /* Tune as appropriate. */
7441 cost = const_ok ? 4 : 8;
7442 booth_unit_size = 8;
7443 for (j = 0; i && j < 32; j += booth_unit_size)
7445 i >>= booth_unit_size;
7449 *total = COSTS_N_INSNS(cost);
7455 *total = COSTS_N_INSNS (4);
7459 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7461 if (TARGET_HARD_FLOAT
7463 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7465 *total = COSTS_N_INSNS (1);
7470 /* Requires a lib call */
7471 *total = COSTS_N_INSNS (20);
7475 return arm_rtx_costs_1 (x, outer_code, total, speed);
7480 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7481 so it can be ignored. */
7484 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7485 int *total, bool speed)
7487 enum machine_mode mode = GET_MODE (x);
7491 *total = thumb1_rtx_costs (x, code, outer_code);
7498 if (GET_CODE (XEXP (x, 0)) != MULT)
7499 return arm_rtx_costs_1 (x, outer_code, total, speed);
7501 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7502 will stall until the multiplication is complete. */
7503 *total = COSTS_N_INSNS (3);
7507 /* There is no point basing this on the tuning, since it is always the
7508 fast variant if it exists at all. */
7510 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7511 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7512 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7514 *total = COSTS_N_INSNS (2);
7521 *total = COSTS_N_INSNS (5);
7525 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7527 /* If operand 1 is a constant we can more accurately
7528 calculate the cost of the multiply. The multiplier can
7529 retire 15 bits on the first cycle and a further 12 on the
7530 second. We do, of course, have to load the constant into
7531 a register first. */
7532 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7533 /* There's a general overhead of one cycle. */
7535 unsigned HOST_WIDE_INT masked_const;
7540 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7542 masked_const = i & 0xffff8000;
7543 if (masked_const != 0)
7546 masked_const = i & 0xf8000000;
7547 if (masked_const != 0)
7550 *total = COSTS_N_INSNS (cost);
7556 *total = COSTS_N_INSNS (3);
7560 /* Requires a lib call */
7561 *total = COSTS_N_INSNS (20);
7565 return arm_rtx_costs_1 (x, outer_code, total, speed);
7570 /* RTX costs for 9e (and later) cores. */
7573 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7574 int *total, bool speed)
7576 enum machine_mode mode = GET_MODE (x);
7583 *total = COSTS_N_INSNS (3);
7587 *total = thumb1_rtx_costs (x, code, outer_code);
7595 /* There is no point basing this on the tuning, since it is always the
7596 fast variant if it exists at all. */
7598 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7599 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7600 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7602 *total = COSTS_N_INSNS (2);
7609 *total = COSTS_N_INSNS (5);
7615 *total = COSTS_N_INSNS (2);
7619 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7621 if (TARGET_HARD_FLOAT
7623 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7625 *total = COSTS_N_INSNS (1);
7630 *total = COSTS_N_INSNS (20);
7634 return arm_rtx_costs_1 (x, outer_code, total, speed);
7637 /* All address computations that can be done are free, but rtx cost returns
7638 the same for practically all of them. So we weight the different types
7639 of address here in the order (most pref first):
7640 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7642 arm_arm_address_cost (rtx x)
7644 enum rtx_code c = GET_CODE (x);
7646 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7648 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7653 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7656 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7666 arm_thumb_address_cost (rtx x)
7668 enum rtx_code c = GET_CODE (x);
7673 && GET_CODE (XEXP (x, 0)) == REG
7674 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7681 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7683 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7687 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7691 /* Some true dependencies can have a higher cost depending
7692 on precisely how certain input operands are used. */
7694 && REG_NOTE_KIND (link) == 0
7695 && recog_memoized (insn) >= 0
7696 && recog_memoized (dep) >= 0)
7698 int shift_opnum = get_attr_shift (insn);
7699 enum attr_type attr_type = get_attr_type (dep);
7701 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7702 operand for INSN. If we have a shifted input operand and the
7703 instruction we depend on is another ALU instruction, then we may
7704 have to account for an additional stall. */
7705 if (shift_opnum != 0
7706 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7708 rtx shifted_operand;
7711 /* Get the shifted operand. */
7712 extract_insn (insn);
7713 shifted_operand = recog_data.operand[shift_opnum];
7715 /* Iterate over all the operands in DEP. If we write an operand
7716 that overlaps with SHIFTED_OPERAND, then we have increase the
7717 cost of this dependency. */
7719 preprocess_constraints ();
7720 for (opno = 0; opno < recog_data.n_operands; opno++)
7722 /* We can ignore strict inputs. */
7723 if (recog_data.operand_type[opno] == OP_IN)
7726 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7733 /* XXX This is not strictly true for the FPA. */
7734 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7735 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7738 /* Call insns don't incur a stall, even if they follow a load. */
7739 if (REG_NOTE_KIND (link) == 0
7740 && GET_CODE (insn) == CALL_INSN)
7743 if ((i_pat = single_set (insn)) != NULL
7744 && GET_CODE (SET_SRC (i_pat)) == MEM
7745 && (d_pat = single_set (dep)) != NULL
7746 && GET_CODE (SET_DEST (d_pat)) == MEM)
7748 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7749 /* This is a load after a store, there is no conflict if the load reads
7750 from a cached area. Assume that loads from the stack, and from the
7751 constant pool are cached, and that others will miss. This is a
7754 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
7755 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7756 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7757 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7764 static int fp_consts_inited = 0;
7766 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7767 static const char * const strings_fp[8] =
7770 "4", "5", "0.5", "10"
7773 static REAL_VALUE_TYPE values_fp[8];
7776 init_fp_table (void)
7782 fp_consts_inited = 1;
7784 fp_consts_inited = 8;
7786 for (i = 0; i < fp_consts_inited; i++)
7788 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7793 /* Return TRUE if rtx X is a valid immediate FP constant. */
7795 arm_const_double_rtx (rtx x)
7800 if (!fp_consts_inited)
7803 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7804 if (REAL_VALUE_MINUS_ZERO (r))
7807 for (i = 0; i < fp_consts_inited; i++)
7808 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7814 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7816 neg_const_double_rtx_ok_for_fpa (rtx x)
7821 if (!fp_consts_inited)
7824 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7825 r = real_value_negate (&r);
7826 if (REAL_VALUE_MINUS_ZERO (r))
7829 for (i = 0; i < 8; i++)
7830 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7837 /* VFPv3 has a fairly wide range of representable immediates, formed from
7838 "quarter-precision" floating-point values. These can be evaluated using this
7839 formula (with ^ for exponentiation):
7843 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7844 16 <= n <= 31 and 0 <= r <= 7.
7846 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7848 - A (most-significant) is the sign bit.
7849 - BCD are the exponent (encoded as r XOR 3).
7850 - EFGH are the mantissa (encoded as n - 16).
7853 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7854 fconst[sd] instruction, or -1 if X isn't suitable. */
7856 vfp3_const_double_index (rtx x)
7858 REAL_VALUE_TYPE r, m;
7860 unsigned HOST_WIDE_INT mantissa, mant_hi;
7861 unsigned HOST_WIDE_INT mask;
7862 HOST_WIDE_INT m1, m2;
7863 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7865 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
7868 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7870 /* We can't represent these things, so detect them first. */
7871 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
7874 /* Extract sign, exponent and mantissa. */
7875 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
7876 r = real_value_abs (&r);
7877 exponent = REAL_EXP (&r);
7878 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7879 highest (sign) bit, with a fixed binary point at bit point_pos.
7880 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7881 bits for the mantissa, this may fail (low bits would be lost). */
7882 real_ldexp (&m, &r, point_pos - exponent);
7883 REAL_VALUE_TO_INT (&m1, &m2, m);
7887 /* If there are bits set in the low part of the mantissa, we can't
7888 represent this value. */
7892 /* Now make it so that mantissa contains the most-significant bits, and move
7893 the point_pos to indicate that the least-significant bits have been
7895 point_pos -= HOST_BITS_PER_WIDE_INT;
7898 /* We can permit four significant bits of mantissa only, plus a high bit
7899 which is always 1. */
7900 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7901 if ((mantissa & mask) != 0)
7904 /* Now we know the mantissa is in range, chop off the unneeded bits. */
7905 mantissa >>= point_pos - 5;
7907 /* The mantissa may be zero. Disallow that case. (It's possible to load the
7908 floating-point immediate zero with Neon using an integer-zero load, but
7909 that case is handled elsewhere.) */
7913 gcc_assert (mantissa >= 16 && mantissa <= 31);
7915 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
7916 normalized significands are in the range [1, 2). (Our mantissa is shifted
7917 left 4 places at this point relative to normalized IEEE754 values). GCC
7918 internally uses [0.5, 1) (see real.c), so the exponent returned from
7919 REAL_EXP must be altered. */
7920 exponent = 5 - exponent;
7922 if (exponent < 0 || exponent > 7)
7925 /* Sign, mantissa and exponent are now in the correct form to plug into the
7926 formula described in the comment above. */
7927 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
7930 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
7932 vfp3_const_double_rtx (rtx x)
7937 return vfp3_const_double_index (x) != -1;
7940 /* Recognize immediates which can be used in various Neon instructions. Legal
7941 immediates are described by the following table (for VMVN variants, the
7942 bitwise inverse of the constant shown is recognized. In either case, VMOV
7943 is output and the correct instruction to use for a given constant is chosen
7944 by the assembler). The constant shown is replicated across all elements of
7945 the destination vector.
7947 insn elems variant constant (binary)
7948 ---- ----- ------- -----------------
7949 vmov i32 0 00000000 00000000 00000000 abcdefgh
7950 vmov i32 1 00000000 00000000 abcdefgh 00000000
7951 vmov i32 2 00000000 abcdefgh 00000000 00000000
7952 vmov i32 3 abcdefgh 00000000 00000000 00000000
7953 vmov i16 4 00000000 abcdefgh
7954 vmov i16 5 abcdefgh 00000000
7955 vmvn i32 6 00000000 00000000 00000000 abcdefgh
7956 vmvn i32 7 00000000 00000000 abcdefgh 00000000
7957 vmvn i32 8 00000000 abcdefgh 00000000 00000000
7958 vmvn i32 9 abcdefgh 00000000 00000000 00000000
7959 vmvn i16 10 00000000 abcdefgh
7960 vmvn i16 11 abcdefgh 00000000
7961 vmov i32 12 00000000 00000000 abcdefgh 11111111
7962 vmvn i32 13 00000000 00000000 abcdefgh 11111111
7963 vmov i32 14 00000000 abcdefgh 11111111 11111111
7964 vmvn i32 15 00000000 abcdefgh 11111111 11111111
7966 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
7967 eeeeeeee ffffffff gggggggg hhhhhhhh
7968 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
7970 For case 18, B = !b. Representable values are exactly those accepted by
7971 vfp3_const_double_index, but are output as floating-point numbers rather
7974 Variants 0-5 (inclusive) may also be used as immediates for the second
7975 operand of VORR/VBIC instructions.
7977 The INVERSE argument causes the bitwise inverse of the given operand to be
7978 recognized instead (used for recognizing legal immediates for the VAND/VORN
7979 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
7980 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
7981 output, rather than the real insns vbic/vorr).
7983 INVERSE makes no difference to the recognition of float vectors.
7985 The return value is the variant of immediate as shown in the above table, or
7986 -1 if the given value doesn't match any of the listed patterns.
7989 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
7990 rtx *modconst, int *elementwidth)
7992 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
7994 for (i = 0; i < idx; i += (STRIDE)) \
7999 immtype = (CLASS); \
8000 elsize = (ELSIZE); \
8004 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8005 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8006 unsigned char bytes[16];
8007 int immtype = -1, matches;
8008 unsigned int invmask = inverse ? 0xff : 0;
8010 /* Vectors of float constants. */
8011 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8013 rtx el0 = CONST_VECTOR_ELT (op, 0);
8016 if (!vfp3_const_double_rtx (el0))
8019 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8021 for (i = 1; i < n_elts; i++)
8023 rtx elt = CONST_VECTOR_ELT (op, i);
8026 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8028 if (!REAL_VALUES_EQUAL (r0, re))
8033 *modconst = CONST_VECTOR_ELT (op, 0);
8041 /* Splat vector constant out into a byte vector. */
8042 for (i = 0; i < n_elts; i++)
8044 rtx el = CONST_VECTOR_ELT (op, i);
8045 unsigned HOST_WIDE_INT elpart;
8046 unsigned int part, parts;
8048 if (GET_CODE (el) == CONST_INT)
8050 elpart = INTVAL (el);
8053 else if (GET_CODE (el) == CONST_DOUBLE)
8055 elpart = CONST_DOUBLE_LOW (el);
8061 for (part = 0; part < parts; part++)
8064 for (byte = 0; byte < innersize; byte++)
8066 bytes[idx++] = (elpart & 0xff) ^ invmask;
8067 elpart >>= BITS_PER_UNIT;
8069 if (GET_CODE (el) == CONST_DOUBLE)
8070 elpart = CONST_DOUBLE_HIGH (el);
8075 gcc_assert (idx == GET_MODE_SIZE (mode));
8079 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8080 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8082 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8083 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8085 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8086 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8088 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8089 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8091 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8093 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8095 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8096 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8098 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8099 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8101 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8102 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8104 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8105 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8107 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8109 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8111 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8112 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8114 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8115 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8117 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8118 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8120 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8121 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8123 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8125 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8126 && bytes[i] == bytes[(i + 8) % idx]);
8134 *elementwidth = elsize;
8138 unsigned HOST_WIDE_INT imm = 0;
8140 /* Un-invert bytes of recognized vector, if necessary. */
8142 for (i = 0; i < idx; i++)
8143 bytes[i] ^= invmask;
8147 /* FIXME: Broken on 32-bit H_W_I hosts. */
8148 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8150 for (i = 0; i < 8; i++)
8151 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8152 << (i * BITS_PER_UNIT);
8154 *modconst = GEN_INT (imm);
8158 unsigned HOST_WIDE_INT imm = 0;
8160 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8161 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8163 *modconst = GEN_INT (imm);
8171 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8172 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8173 float elements), and a modified constant (whatever should be output for a
8174 VMOV) in *MODCONST. */
8177 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8178 rtx *modconst, int *elementwidth)
8182 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8188 *modconst = tmpconst;
8191 *elementwidth = tmpwidth;
8196 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8197 the immediate is valid, write a constant suitable for using as an operand
8198 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8199 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8202 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8203 rtx *modconst, int *elementwidth)
8207 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8209 if (retval < 0 || retval > 5)
8213 *modconst = tmpconst;
8216 *elementwidth = tmpwidth;
8221 /* Return a string suitable for output of Neon immediate logic operation
8225 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8226 int inverse, int quad)
8228 int width, is_valid;
8229 static char templ[40];
8231 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8233 gcc_assert (is_valid != 0);
8236 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8238 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8243 /* Output a sequence of pairwise operations to implement a reduction.
8244 NOTE: We do "too much work" here, because pairwise operations work on two
8245 registers-worth of operands in one go. Unfortunately we can't exploit those
8246 extra calculations to do the full operation in fewer steps, I don't think.
8247 Although all vector elements of the result but the first are ignored, we
8248 actually calculate the same result in each of the elements. An alternative
8249 such as initially loading a vector with zero to use as each of the second
8250 operands would use up an additional register and take an extra instruction,
8251 for no particular gain. */
8254 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8255 rtx (*reduc) (rtx, rtx, rtx))
8257 enum machine_mode inner = GET_MODE_INNER (mode);
8258 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8261 for (i = parts / 2; i >= 1; i /= 2)
8263 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8264 emit_insn (reduc (dest, tmpsum, tmpsum));
8269 /* If VALS is a vector constant that can be loaded into a register
8270 using VDUP, generate instructions to do so and return an RTX to
8271 assign to the register. Otherwise return NULL_RTX. */
8274 neon_vdup_constant (rtx vals)
8276 enum machine_mode mode = GET_MODE (vals);
8277 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8278 int n_elts = GET_MODE_NUNITS (mode);
8279 bool all_same = true;
8283 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8286 for (i = 0; i < n_elts; ++i)
8288 x = XVECEXP (vals, 0, i);
8289 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8294 /* The elements are not all the same. We could handle repeating
8295 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8296 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8300 /* We can load this constant by using VDUP and a constant in a
8301 single ARM register. This will be cheaper than a vector
8304 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8305 return gen_rtx_VEC_DUPLICATE (mode, x);
8308 /* Generate code to load VALS, which is a PARALLEL containing only
8309 constants (for vec_init) or CONST_VECTOR, efficiently into a
8310 register. Returns an RTX to copy into the register, or NULL_RTX
8311 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8314 neon_make_constant (rtx vals)
8316 enum machine_mode mode = GET_MODE (vals);
8318 rtx const_vec = NULL_RTX;
8319 int n_elts = GET_MODE_NUNITS (mode);
8323 if (GET_CODE (vals) == CONST_VECTOR)
8325 else if (GET_CODE (vals) == PARALLEL)
8327 /* A CONST_VECTOR must contain only CONST_INTs and
8328 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8329 Only store valid constants in a CONST_VECTOR. */
8330 for (i = 0; i < n_elts; ++i)
8332 rtx x = XVECEXP (vals, 0, i);
8333 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8336 if (n_const == n_elts)
8337 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8342 if (const_vec != NULL
8343 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8344 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8346 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8347 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8348 pipeline cycle; creating the constant takes one or two ARM
8351 else if (const_vec != NULL_RTX)
8352 /* Load from constant pool. On Cortex-A8 this takes two cycles
8353 (for either double or quad vectors). We can not take advantage
8354 of single-cycle VLD1 because we need a PC-relative addressing
8358 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8359 We can not construct an initializer. */
8363 /* Initialize vector TARGET to VALS. */
8366 neon_expand_vector_init (rtx target, rtx vals)
8368 enum machine_mode mode = GET_MODE (target);
8369 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8370 int n_elts = GET_MODE_NUNITS (mode);
8371 int n_var = 0, one_var = -1;
8372 bool all_same = true;
8376 for (i = 0; i < n_elts; ++i)
8378 x = XVECEXP (vals, 0, i);
8379 if (!CONSTANT_P (x))
8380 ++n_var, one_var = i;
8382 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8388 rtx constant = neon_make_constant (vals);
8389 if (constant != NULL_RTX)
8391 emit_move_insn (target, constant);
8396 /* Splat a single non-constant element if we can. */
8397 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8399 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8400 emit_insn (gen_rtx_SET (VOIDmode, target,
8401 gen_rtx_VEC_DUPLICATE (mode, x)));
8405 /* One field is non-constant. Load constant then overwrite varying
8406 field. This is more efficient than using the stack. */
8409 rtx copy = copy_rtx (vals);
8410 rtx index = GEN_INT (one_var);
8412 /* Load constant part of vector, substitute neighboring value for
8414 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8415 neon_expand_vector_init (target, copy);
8417 /* Insert variable. */
8418 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8422 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8425 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8428 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8431 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8434 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8437 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8440 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8443 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8446 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8454 /* Construct the vector in memory one field at a time
8455 and load the whole vector. */
8456 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8457 for (i = 0; i < n_elts; i++)
8458 emit_move_insn (adjust_address_nv (mem, inner_mode,
8459 i * GET_MODE_SIZE (inner_mode)),
8460 XVECEXP (vals, 0, i));
8461 emit_move_insn (target, mem);
8464 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8465 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8466 reported source locations are bogus. */
8469 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8474 gcc_assert (GET_CODE (operand) == CONST_INT);
8476 lane = INTVAL (operand);
8478 if (lane < low || lane >= high)
8482 /* Bounds-check lanes. */
8485 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8487 bounds_check (operand, low, high, "lane out of range");
8490 /* Bounds-check constants. */
8493 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8495 bounds_check (operand, low, high, "constant out of range");
8499 neon_element_bits (enum machine_mode mode)
8502 return GET_MODE_BITSIZE (mode);
8504 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8508 /* Predicates for `match_operand' and `match_operator'. */
8510 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8512 cirrus_memory_offset (rtx op)
8514 /* Reject eliminable registers. */
8515 if (! (reload_in_progress || reload_completed)
8516 && ( reg_mentioned_p (frame_pointer_rtx, op)
8517 || reg_mentioned_p (arg_pointer_rtx, op)
8518 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8519 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8520 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8521 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8524 if (GET_CODE (op) == MEM)
8530 /* Match: (mem (reg)). */
8531 if (GET_CODE (ind) == REG)
8537 if (GET_CODE (ind) == PLUS
8538 && GET_CODE (XEXP (ind, 0)) == REG
8539 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8540 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8547 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8548 WB is true if full writeback address modes are allowed and is false
8549 if limited writeback address modes (POST_INC and PRE_DEC) are
8553 arm_coproc_mem_operand (rtx op, bool wb)
8557 /* Reject eliminable registers. */
8558 if (! (reload_in_progress || reload_completed)
8559 && ( reg_mentioned_p (frame_pointer_rtx, op)
8560 || reg_mentioned_p (arg_pointer_rtx, op)
8561 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8562 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8563 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8564 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8567 /* Constants are converted into offsets from labels. */
8568 if (GET_CODE (op) != MEM)
8573 if (reload_completed
8574 && (GET_CODE (ind) == LABEL_REF
8575 || (GET_CODE (ind) == CONST
8576 && GET_CODE (XEXP (ind, 0)) == PLUS
8577 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8578 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8581 /* Match: (mem (reg)). */
8582 if (GET_CODE (ind) == REG)
8583 return arm_address_register_rtx_p (ind, 0);
8585 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8586 acceptable in any case (subject to verification by
8587 arm_address_register_rtx_p). We need WB to be true to accept
8588 PRE_INC and POST_DEC. */
8589 if (GET_CODE (ind) == POST_INC
8590 || GET_CODE (ind) == PRE_DEC
8592 && (GET_CODE (ind) == PRE_INC
8593 || GET_CODE (ind) == POST_DEC)))
8594 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8597 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8598 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8599 && GET_CODE (XEXP (ind, 1)) == PLUS
8600 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8601 ind = XEXP (ind, 1);
8606 if (GET_CODE (ind) == PLUS
8607 && GET_CODE (XEXP (ind, 0)) == REG
8608 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8609 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8610 && INTVAL (XEXP (ind, 1)) > -1024
8611 && INTVAL (XEXP (ind, 1)) < 1024
8612 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8618 /* Return TRUE if OP is a memory operand which we can load or store a vector
8619 to/from. TYPE is one of the following values:
8620 0 - Vector load/stor (vldr)
8621 1 - Core registers (ldm)
8622 2 - Element/structure loads (vld1)
8625 neon_vector_mem_operand (rtx op, int type)
8629 /* Reject eliminable registers. */
8630 if (! (reload_in_progress || reload_completed)
8631 && ( reg_mentioned_p (frame_pointer_rtx, op)
8632 || reg_mentioned_p (arg_pointer_rtx, op)
8633 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8634 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8635 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8636 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8639 /* Constants are converted into offsets from labels. */
8640 if (GET_CODE (op) != MEM)
8645 if (reload_completed
8646 && (GET_CODE (ind) == LABEL_REF
8647 || (GET_CODE (ind) == CONST
8648 && GET_CODE (XEXP (ind, 0)) == PLUS
8649 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8650 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8653 /* Match: (mem (reg)). */
8654 if (GET_CODE (ind) == REG)
8655 return arm_address_register_rtx_p (ind, 0);
8657 /* Allow post-increment with Neon registers. */
8658 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8659 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8661 /* FIXME: vld1 allows register post-modify. */
8667 && GET_CODE (ind) == PLUS
8668 && GET_CODE (XEXP (ind, 0)) == REG
8669 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8670 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8671 && INTVAL (XEXP (ind, 1)) > -1024
8672 && INTVAL (XEXP (ind, 1)) < 1016
8673 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8679 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8682 neon_struct_mem_operand (rtx op)
8686 /* Reject eliminable registers. */
8687 if (! (reload_in_progress || reload_completed)
8688 && ( reg_mentioned_p (frame_pointer_rtx, op)
8689 || reg_mentioned_p (arg_pointer_rtx, op)
8690 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8691 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8692 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8693 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8696 /* Constants are converted into offsets from labels. */
8697 if (GET_CODE (op) != MEM)
8702 if (reload_completed
8703 && (GET_CODE (ind) == LABEL_REF
8704 || (GET_CODE (ind) == CONST
8705 && GET_CODE (XEXP (ind, 0)) == PLUS
8706 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8707 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8710 /* Match: (mem (reg)). */
8711 if (GET_CODE (ind) == REG)
8712 return arm_address_register_rtx_p (ind, 0);
8717 /* Return true if X is a register that will be eliminated later on. */
8719 arm_eliminable_register (rtx x)
8721 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8722 || REGNO (x) == ARG_POINTER_REGNUM
8723 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8724 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8727 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8728 coprocessor registers. Otherwise return NO_REGS. */
8731 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8735 if (!TARGET_NEON_FP16)
8736 return GENERAL_REGS;
8737 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8739 return GENERAL_REGS;
8743 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8744 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8745 && neon_vector_mem_operand (x, 0))
8748 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8751 return GENERAL_REGS;
8754 /* Values which must be returned in the most-significant end of the return
8758 arm_return_in_msb (const_tree valtype)
8760 return (TARGET_AAPCS_BASED
8762 && (AGGREGATE_TYPE_P (valtype)
8763 || TREE_CODE (valtype) == COMPLEX_TYPE));
8766 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8767 Use by the Cirrus Maverick code which has to workaround
8768 a hardware bug triggered by such instructions. */
8770 arm_memory_load_p (rtx insn)
8772 rtx body, lhs, rhs;;
8774 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8777 body = PATTERN (insn);
8779 if (GET_CODE (body) != SET)
8782 lhs = XEXP (body, 0);
8783 rhs = XEXP (body, 1);
8785 lhs = REG_OR_SUBREG_RTX (lhs);
8787 /* If the destination is not a general purpose
8788 register we do not have to worry. */
8789 if (GET_CODE (lhs) != REG
8790 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8793 /* As well as loads from memory we also have to react
8794 to loads of invalid constants which will be turned
8795 into loads from the minipool. */
8796 return (GET_CODE (rhs) == MEM
8797 || GET_CODE (rhs) == SYMBOL_REF
8798 || note_invalid_constants (insn, -1, false));
8801 /* Return TRUE if INSN is a Cirrus instruction. */
8803 arm_cirrus_insn_p (rtx insn)
8805 enum attr_cirrus attr;
8807 /* get_attr cannot accept USE or CLOBBER. */
8809 || GET_CODE (insn) != INSN
8810 || GET_CODE (PATTERN (insn)) == USE
8811 || GET_CODE (PATTERN (insn)) == CLOBBER)
8814 attr = get_attr_cirrus (insn);
8816 return attr != CIRRUS_NOT;
8819 /* Cirrus reorg for invalid instruction combinations. */
8821 cirrus_reorg (rtx first)
8823 enum attr_cirrus attr;
8824 rtx body = PATTERN (first);
8828 /* Any branch must be followed by 2 non Cirrus instructions. */
8829 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8832 t = next_nonnote_insn (first);
8834 if (arm_cirrus_insn_p (t))
8837 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8841 emit_insn_after (gen_nop (), first);
8846 /* (float (blah)) is in parallel with a clobber. */
8847 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
8848 body = XVECEXP (body, 0, 0);
8850 if (GET_CODE (body) == SET)
8852 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
8854 /* cfldrd, cfldr64, cfstrd, cfstr64 must
8855 be followed by a non Cirrus insn. */
8856 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
8858 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
8859 emit_insn_after (gen_nop (), first);
8863 else if (arm_memory_load_p (first))
8865 unsigned int arm_regno;
8867 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8868 ldr/cfmv64hr combination where the Rd field is the same
8869 in both instructions must be split with a non Cirrus
8876 /* Get Arm register number for ldr insn. */
8877 if (GET_CODE (lhs) == REG)
8878 arm_regno = REGNO (lhs);
8881 gcc_assert (GET_CODE (rhs) == REG);
8882 arm_regno = REGNO (rhs);
8886 first = next_nonnote_insn (first);
8888 if (! arm_cirrus_insn_p (first))
8891 body = PATTERN (first);
8893 /* (float (blah)) is in parallel with a clobber. */
8894 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
8895 body = XVECEXP (body, 0, 0);
8897 if (GET_CODE (body) == FLOAT)
8898 body = XEXP (body, 0);
8900 if (get_attr_cirrus (first) == CIRRUS_MOVE
8901 && GET_CODE (XEXP (body, 1)) == REG
8902 && arm_regno == REGNO (XEXP (body, 1)))
8903 emit_insn_after (gen_nop (), first);
8909 /* get_attr cannot accept USE or CLOBBER. */
8911 || GET_CODE (first) != INSN
8912 || GET_CODE (PATTERN (first)) == USE
8913 || GET_CODE (PATTERN (first)) == CLOBBER)
8916 attr = get_attr_cirrus (first);
8918 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
8919 must be followed by a non-coprocessor instruction. */
8920 if (attr == CIRRUS_COMPARE)
8924 t = next_nonnote_insn (first);
8926 if (arm_cirrus_insn_p (t))
8929 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8933 emit_insn_after (gen_nop (), first);
8939 /* Return TRUE if X references a SYMBOL_REF. */
8941 symbol_mentioned_p (rtx x)
8946 if (GET_CODE (x) == SYMBOL_REF)
8949 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
8950 are constant offsets, not symbols. */
8951 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8954 fmt = GET_RTX_FORMAT (GET_CODE (x));
8956 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8962 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8963 if (symbol_mentioned_p (XVECEXP (x, i, j)))
8966 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
8973 /* Return TRUE if X references a LABEL_REF. */
8975 label_mentioned_p (rtx x)
8980 if (GET_CODE (x) == LABEL_REF)
8983 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
8984 instruction, but they are constant offsets, not symbols. */
8985 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8988 fmt = GET_RTX_FORMAT (GET_CODE (x));
8989 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8995 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8996 if (label_mentioned_p (XVECEXP (x, i, j)))
8999 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9007 tls_mentioned_p (rtx x)
9009 switch (GET_CODE (x))
9012 return tls_mentioned_p (XEXP (x, 0));
9015 if (XINT (x, 1) == UNSPEC_TLS)
9023 /* Must not copy any rtx that uses a pc-relative address. */
9026 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9028 if (GET_CODE (*x) == UNSPEC
9029 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9035 arm_cannot_copy_insn_p (rtx insn)
9037 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9043 enum rtx_code code = GET_CODE (x);
9060 /* Return 1 if memory locations are adjacent. */
9062 adjacent_mem_locations (rtx a, rtx b)
9064 /* We don't guarantee to preserve the order of these memory refs. */
9065 if (volatile_refs_p (a) || volatile_refs_p (b))
9068 if ((GET_CODE (XEXP (a, 0)) == REG
9069 || (GET_CODE (XEXP (a, 0)) == PLUS
9070 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9071 && (GET_CODE (XEXP (b, 0)) == REG
9072 || (GET_CODE (XEXP (b, 0)) == PLUS
9073 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9075 HOST_WIDE_INT val0 = 0, val1 = 0;
9079 if (GET_CODE (XEXP (a, 0)) == PLUS)
9081 reg0 = XEXP (XEXP (a, 0), 0);
9082 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9087 if (GET_CODE (XEXP (b, 0)) == PLUS)
9089 reg1 = XEXP (XEXP (b, 0), 0);
9090 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9095 /* Don't accept any offset that will require multiple
9096 instructions to handle, since this would cause the
9097 arith_adjacentmem pattern to output an overlong sequence. */
9098 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9101 /* Don't allow an eliminable register: register elimination can make
9102 the offset too large. */
9103 if (arm_eliminable_register (reg0))
9106 val_diff = val1 - val0;
9110 /* If the target has load delay slots, then there's no benefit
9111 to using an ldm instruction unless the offset is zero and
9112 we are optimizing for size. */
9113 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9114 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9115 && (val_diff == 4 || val_diff == -4));
9118 return ((REGNO (reg0) == REGNO (reg1))
9119 && (val_diff == 4 || val_diff == -4));
9125 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9126 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9127 instruction. ADD_OFFSET is nonzero if the base address register needs
9128 to be modified with an add instruction before we can use it. */
9131 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9132 int nops, HOST_WIDE_INT add_offset)
9134 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9135 if the offset isn't small enough. The reason 2 ldrs are faster
9136 is because these ARMs are able to do more than one cache access
9137 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9138 whilst the ARM8 has a double bandwidth cache. This means that
9139 these cores can do both an instruction fetch and a data fetch in
9140 a single cycle, so the trick of calculating the address into a
9141 scratch register (one of the result regs) and then doing a load
9142 multiple actually becomes slower (and no smaller in code size).
9143 That is the transformation
9145 ldr rd1, [rbase + offset]
9146 ldr rd2, [rbase + offset + 4]
9150 add rd1, rbase, offset
9151 ldmia rd1, {rd1, rd2}
9153 produces worse code -- '3 cycles + any stalls on rd2' instead of
9154 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9155 access per cycle, the first sequence could never complete in less
9156 than 6 cycles, whereas the ldm sequence would only take 5 and
9157 would make better use of sequential accesses if not hitting the
9160 We cheat here and test 'arm_ld_sched' which we currently know to
9161 only be true for the ARM8, ARM9 and StrongARM. If this ever
9162 changes, then the test below needs to be reworked. */
9163 if (nops == 2 && arm_ld_sched && add_offset != 0)
9169 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9170 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9171 an array ORDER which describes the sequence to use when accessing the
9172 offsets that produces an ascending order. In this sequence, each
9173 offset must be larger by exactly 4 than the previous one. ORDER[0]
9174 must have been filled in with the lowest offset by the caller.
9175 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9176 we use to verify that ORDER produces an ascending order of registers.
9177 Return true if it was possible to construct such an order, false if
9181 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9185 for (i = 1; i < nops; i++)
9189 order[i] = order[i - 1];
9190 for (j = 0; j < nops; j++)
9191 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9193 /* We must find exactly one offset that is higher than the
9194 previous one by 4. */
9195 if (order[i] != order[i - 1])
9199 if (order[i] == order[i - 1])
9201 /* The register numbers must be ascending. */
9202 if (unsorted_regs != NULL
9203 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9210 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9211 HOST_WIDE_INT *load_offset)
9213 int unsorted_regs[MAX_LDM_STM_OPS];
9214 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9215 int order[MAX_LDM_STM_OPS];
9219 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9220 easily extended if required. */
9221 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9223 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9225 /* Loop over the operands and check that the memory references are
9226 suitable (i.e. immediate offsets from the same base register). At
9227 the same time, extract the target register, and the memory
9229 for (i = 0; i < nops; i++)
9234 /* Convert a subreg of a mem into the mem itself. */
9235 if (GET_CODE (operands[nops + i]) == SUBREG)
9236 operands[nops + i] = alter_subreg (operands + (nops + i));
9238 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9240 /* Don't reorder volatile memory references; it doesn't seem worth
9241 looking for the case where the order is ok anyway. */
9242 if (MEM_VOLATILE_P (operands[nops + i]))
9245 offset = const0_rtx;
9247 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9248 || (GET_CODE (reg) == SUBREG
9249 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9250 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9251 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9253 || (GET_CODE (reg) == SUBREG
9254 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9255 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9259 base_reg = REGNO (reg);
9262 if (base_reg != (int) REGNO (reg))
9263 /* Not addressed from the same base register. */
9266 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9267 ? REGNO (operands[i])
9268 : REGNO (SUBREG_REG (operands[i])));
9270 /* If it isn't an integer register, or if it overwrites the
9271 base register but isn't the last insn in the list, then
9272 we can't do this. */
9273 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
9274 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9277 unsorted_offsets[i] = INTVAL (offset);
9278 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9282 /* Not a suitable memory address. */
9286 /* All the useful information has now been extracted from the
9287 operands into unsorted_regs and unsorted_offsets; additionally,
9288 order[0] has been set to the lowest offset in the list. Sort
9289 the offsets into order, verifying that they are adjacent, and
9290 check that the register numbers are ascending. */
9291 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs))
9298 for (i = 0; i < nops; i++)
9299 regs[i] = unsorted_regs[order[i]];
9301 *load_offset = unsorted_offsets[order[0]];
9304 if (unsorted_offsets[order[0]] == 0)
9305 ldm_case = 1; /* ldmia */
9306 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9307 ldm_case = 2; /* ldmib */
9308 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9309 ldm_case = 3; /* ldmda */
9310 else if (unsorted_offsets[order[nops - 1]] == -4)
9311 ldm_case = 4; /* ldmdb */
9312 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9313 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9318 if (!multiple_operation_profitable_p (false, nops,
9320 ? unsorted_offsets[order[0]] : 0))
9327 emit_ldm_seq (rtx *operands, int nops)
9329 int regs[MAX_LDM_STM_OPS];
9331 HOST_WIDE_INT offset;
9335 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9338 strcpy (buf, "ldm%(ia%)\t");
9342 strcpy (buf, "ldm%(ib%)\t");
9346 strcpy (buf, "ldm%(da%)\t");
9350 strcpy (buf, "ldm%(db%)\t");
9355 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9356 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9359 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9360 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9362 output_asm_insn (buf, operands);
9364 strcpy (buf, "ldm%(ia%)\t");
9371 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9372 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9374 for (i = 1; i < nops; i++)
9375 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9376 reg_names[regs[i]]);
9378 strcat (buf, "}\t%@ phole ldm");
9380 output_asm_insn (buf, operands);
9385 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9386 HOST_WIDE_INT * load_offset)
9388 int unsorted_regs[MAX_LDM_STM_OPS];
9389 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9390 int order[MAX_LDM_STM_OPS];
9394 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9395 easily extended if required. */
9396 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9398 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9400 /* Loop over the operands and check that the memory references are
9401 suitable (i.e. immediate offsets from the same base register). At
9402 the same time, extract the target register, and the memory
9404 for (i = 0; i < nops; i++)
9409 /* Convert a subreg of a mem into the mem itself. */
9410 if (GET_CODE (operands[nops + i]) == SUBREG)
9411 operands[nops + i] = alter_subreg (operands + (nops + i));
9413 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9415 /* Don't reorder volatile memory references; it doesn't seem worth
9416 looking for the case where the order is ok anyway. */
9417 if (MEM_VOLATILE_P (operands[nops + i]))
9420 offset = const0_rtx;
9422 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9423 || (GET_CODE (reg) == SUBREG
9424 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9425 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9426 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9428 || (GET_CODE (reg) == SUBREG
9429 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9430 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9433 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9434 ? REGNO (operands[i])
9435 : REGNO (SUBREG_REG (operands[i])));
9437 base_reg = REGNO (reg);
9438 else if (base_reg != (int) REGNO (reg))
9439 /* Not addressed from the same base register. */
9442 /* If it isn't an integer register, then we can't do this. */
9443 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
9446 unsorted_offsets[i] = INTVAL (offset);
9447 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9451 /* Not a suitable memory address. */
9455 /* All the useful information has now been extracted from the
9456 operands into unsorted_regs and unsorted_offsets; additionally,
9457 order[0] has been set to the lowest offset in the list. Sort
9458 the offsets into order, verifying that they are adjacent, and
9459 check that the register numbers are ascending. */
9460 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs))
9467 for (i = 0; i < nops; i++)
9468 regs[i] = unsorted_regs[order[i]];
9470 *load_offset = unsorted_offsets[order[0]];
9473 if (unsorted_offsets[order[0]] == 0)
9474 stm_case = 1; /* stmia */
9475 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9476 stm_case = 2; /* stmib */
9477 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9478 stm_case = 3; /* stmda */
9479 else if (unsorted_offsets[order[nops - 1]] == -4)
9480 stm_case = 4; /* stmdb */
9484 if (!multiple_operation_profitable_p (false, nops, 0))
9491 emit_stm_seq (rtx *operands, int nops)
9493 int regs[MAX_LDM_STM_OPS];
9495 HOST_WIDE_INT offset;
9499 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9502 strcpy (buf, "stm%(ia%)\t");
9506 strcpy (buf, "stm%(ib%)\t");
9510 strcpy (buf, "stm%(da%)\t");
9514 strcpy (buf, "stm%(db%)\t");
9521 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9522 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9524 for (i = 1; i < nops; i++)
9525 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9526 reg_names[regs[i]]);
9528 strcat (buf, "}\t%@ phole stm");
9530 output_asm_insn (buf, operands);
9534 /* Routines for use in generating RTL. */
9537 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
9538 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9540 HOST_WIDE_INT offset = *offsetp;
9543 int sign = up ? 1 : -1;
9546 /* XScale has load-store double instructions, but they have stricter
9547 alignment requirements than load-store multiple, so we cannot
9550 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9551 the pipeline until completion.
9559 An ldr instruction takes 1-3 cycles, but does not block the
9568 Best case ldr will always win. However, the more ldr instructions
9569 we issue, the less likely we are to be able to schedule them well.
9570 Using ldr instructions also increases code size.
9572 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9573 for counts of 3 or 4 regs. */
9574 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9580 for (i = 0; i < count; i++)
9582 addr = plus_constant (from, i * 4 * sign);
9583 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9584 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
9590 emit_move_insn (from, plus_constant (from, count * 4 * sign));
9600 result = gen_rtx_PARALLEL (VOIDmode,
9601 rtvec_alloc (count + (write_back ? 1 : 0)));
9604 XVECEXP (result, 0, 0)
9605 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
9610 for (j = 0; i < count; i++, j++)
9612 addr = plus_constant (from, j * 4 * sign);
9613 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9614 XVECEXP (result, 0, i)
9615 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
9626 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
9627 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9629 HOST_WIDE_INT offset = *offsetp;
9632 int sign = up ? 1 : -1;
9635 /* See arm_gen_load_multiple for discussion of
9636 the pros/cons of ldm/stm usage for XScale. */
9637 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9643 for (i = 0; i < count; i++)
9645 addr = plus_constant (to, i * 4 * sign);
9646 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9647 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
9653 emit_move_insn (to, plus_constant (to, count * 4 * sign));
9663 result = gen_rtx_PARALLEL (VOIDmode,
9664 rtvec_alloc (count + (write_back ? 1 : 0)));
9667 XVECEXP (result, 0, 0)
9668 = gen_rtx_SET (VOIDmode, to,
9669 plus_constant (to, count * 4 * sign));
9674 for (j = 0; i < count; i++, j++)
9676 addr = plus_constant (to, j * 4 * sign);
9677 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9678 XVECEXP (result, 0, i)
9679 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
9690 arm_gen_movmemqi (rtx *operands)
9692 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
9693 HOST_WIDE_INT srcoffset, dstoffset;
9695 rtx src, dst, srcbase, dstbase;
9696 rtx part_bytes_reg = NULL;
9699 if (GET_CODE (operands[2]) != CONST_INT
9700 || GET_CODE (operands[3]) != CONST_INT
9701 || INTVAL (operands[2]) > 64
9702 || INTVAL (operands[3]) & 3)
9705 dstbase = operands[0];
9706 srcbase = operands[1];
9708 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
9709 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
9711 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
9712 out_words_to_go = INTVAL (operands[2]) / 4;
9713 last_bytes = INTVAL (operands[2]) & 3;
9714 dstoffset = srcoffset = 0;
9716 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
9717 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
9719 for (i = 0; in_words_to_go >= 2; i+=4)
9721 if (in_words_to_go > 4)
9722 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
9723 srcbase, &srcoffset));
9725 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
9726 FALSE, srcbase, &srcoffset));
9728 if (out_words_to_go)
9730 if (out_words_to_go > 4)
9731 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
9732 dstbase, &dstoffset));
9733 else if (out_words_to_go != 1)
9734 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
9738 dstbase, &dstoffset));
9741 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9742 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
9743 if (last_bytes != 0)
9745 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
9751 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
9752 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
9755 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
9756 if (out_words_to_go)
9760 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9761 sreg = copy_to_reg (mem);
9763 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9764 emit_move_insn (mem, sreg);
9767 gcc_assert (!in_words_to_go); /* Sanity check */
9772 gcc_assert (in_words_to_go > 0);
9774 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9775 part_bytes_reg = copy_to_mode_reg (SImode, mem);
9778 gcc_assert (!last_bytes || part_bytes_reg);
9780 if (BYTES_BIG_ENDIAN && last_bytes)
9782 rtx tmp = gen_reg_rtx (SImode);
9784 /* The bytes we want are in the top end of the word. */
9785 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
9786 GEN_INT (8 * (4 - last_bytes))));
9787 part_bytes_reg = tmp;
9791 mem = adjust_automodify_address (dstbase, QImode,
9792 plus_constant (dst, last_bytes - 1),
9793 dstoffset + last_bytes - 1);
9794 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9798 tmp = gen_reg_rtx (SImode);
9799 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
9800 part_bytes_reg = tmp;
9809 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
9810 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
9814 rtx tmp = gen_reg_rtx (SImode);
9815 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
9816 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
9817 part_bytes_reg = tmp;
9824 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
9825 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9832 /* Select a dominance comparison mode if possible for a test of the general
9833 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
9834 COND_OR == DOM_CC_X_AND_Y => (X && Y)
9835 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
9836 COND_OR == DOM_CC_X_OR_Y => (X || Y)
9837 In all cases OP will be either EQ or NE, but we don't need to know which
9838 here. If we are unable to support a dominance comparison we return
9839 CC mode. This will then fail to match for the RTL expressions that
9840 generate this call. */
9842 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
9844 enum rtx_code cond1, cond2;
9847 /* Currently we will probably get the wrong result if the individual
9848 comparisons are not simple. This also ensures that it is safe to
9849 reverse a comparison if necessary. */
9850 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
9852 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
9856 /* The if_then_else variant of this tests the second condition if the
9857 first passes, but is true if the first fails. Reverse the first
9858 condition to get a true "inclusive-or" expression. */
9859 if (cond_or == DOM_CC_NX_OR_Y)
9860 cond1 = reverse_condition (cond1);
9862 /* If the comparisons are not equal, and one doesn't dominate the other,
9863 then we can't do this. */
9865 && !comparison_dominates_p (cond1, cond2)
9866 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
9871 enum rtx_code temp = cond1;
9879 if (cond_or == DOM_CC_X_AND_Y)
9884 case EQ: return CC_DEQmode;
9885 case LE: return CC_DLEmode;
9886 case LEU: return CC_DLEUmode;
9887 case GE: return CC_DGEmode;
9888 case GEU: return CC_DGEUmode;
9889 default: gcc_unreachable ();
9893 if (cond_or == DOM_CC_X_AND_Y)
9909 if (cond_or == DOM_CC_X_AND_Y)
9925 if (cond_or == DOM_CC_X_AND_Y)
9941 if (cond_or == DOM_CC_X_AND_Y)
9956 /* The remaining cases only occur when both comparisons are the
9959 gcc_assert (cond1 == cond2);
9963 gcc_assert (cond1 == cond2);
9967 gcc_assert (cond1 == cond2);
9971 gcc_assert (cond1 == cond2);
9975 gcc_assert (cond1 == cond2);
9984 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
9986 /* All floating point compares return CCFP if it is an equality
9987 comparison, and CCFPE otherwise. */
9988 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10008 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10013 gcc_unreachable ();
10017 /* A compare with a shifted operand. Because of canonicalization, the
10018 comparison will have to be swapped when we emit the assembler. */
10019 if (GET_MODE (y) == SImode
10020 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10021 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10022 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10023 || GET_CODE (x) == ROTATERT))
10026 /* This operation is performed swapped, but since we only rely on the Z
10027 flag we don't need an additional mode. */
10028 if (GET_MODE (y) == SImode
10029 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10030 && GET_CODE (x) == NEG
10031 && (op == EQ || op == NE))
10034 /* This is a special case that is used by combine to allow a
10035 comparison of a shifted byte load to be split into a zero-extend
10036 followed by a comparison of the shifted integer (only valid for
10037 equalities and unsigned inequalities). */
10038 if (GET_MODE (x) == SImode
10039 && GET_CODE (x) == ASHIFT
10040 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10041 && GET_CODE (XEXP (x, 0)) == SUBREG
10042 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10043 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10044 && (op == EQ || op == NE
10045 || op == GEU || op == GTU || op == LTU || op == LEU)
10046 && GET_CODE (y) == CONST_INT)
10049 /* A construct for a conditional compare, if the false arm contains
10050 0, then both conditions must be true, otherwise either condition
10051 must be true. Not all conditions are possible, so CCmode is
10052 returned if it can't be done. */
10053 if (GET_CODE (x) == IF_THEN_ELSE
10054 && (XEXP (x, 2) == const0_rtx
10055 || XEXP (x, 2) == const1_rtx)
10056 && COMPARISON_P (XEXP (x, 0))
10057 && COMPARISON_P (XEXP (x, 1)))
10058 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10059 INTVAL (XEXP (x, 2)));
10061 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10062 if (GET_CODE (x) == AND
10063 && COMPARISON_P (XEXP (x, 0))
10064 && COMPARISON_P (XEXP (x, 1)))
10065 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10068 if (GET_CODE (x) == IOR
10069 && COMPARISON_P (XEXP (x, 0))
10070 && COMPARISON_P (XEXP (x, 1)))
10071 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10074 /* An operation (on Thumb) where we want to test for a single bit.
10075 This is done by shifting that bit up into the top bit of a
10076 scratch register; we can then branch on the sign bit. */
10078 && GET_MODE (x) == SImode
10079 && (op == EQ || op == NE)
10080 && GET_CODE (x) == ZERO_EXTRACT
10081 && XEXP (x, 1) == const1_rtx)
10084 /* An operation that sets the condition codes as a side-effect, the
10085 V flag is not set correctly, so we can only use comparisons where
10086 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10088 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10089 if (GET_MODE (x) == SImode
10091 && (op == EQ || op == NE || op == LT || op == GE)
10092 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10093 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10094 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10095 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10096 || GET_CODE (x) == LSHIFTRT
10097 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10098 || GET_CODE (x) == ROTATERT
10099 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10100 return CC_NOOVmode;
10102 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10105 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10106 && GET_CODE (x) == PLUS
10107 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10110 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10112 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10114 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10121 /* A DImode comparison against zero can be implemented by
10122 or'ing the two halves together. */
10123 if (y == const0_rtx)
10126 /* We can do an equality test in three Thumb instructions. */
10136 /* DImode unsigned comparisons can be implemented by cmp +
10137 cmpeq without a scratch register. Not worth doing in
10148 /* DImode signed and unsigned comparisons can be implemented
10149 by cmp + sbcs with a scratch register, but that does not
10150 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10151 gcc_assert (op != EQ && op != NE);
10155 gcc_unreachable ();
10162 /* X and Y are two things to compare using CODE. Emit the compare insn and
10163 return the rtx for register 0 in the proper mode. FP means this is a
10164 floating point compare: I don't think that it is needed on the arm. */
10166 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10168 enum machine_mode mode;
10170 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10172 /* We might have X as a constant, Y as a register because of the predicates
10173 used for cmpdi. If so, force X to a register here. */
10174 if (dimode_comparison && !REG_P (x))
10175 x = force_reg (DImode, x);
10177 mode = SELECT_CC_MODE (code, x, y);
10178 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10180 if (dimode_comparison
10181 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10182 && mode != CC_CZmode)
10186 /* To compare two non-zero values for equality, XOR them and
10187 then compare against zero. Not used for ARM mode; there
10188 CC_CZmode is cheaper. */
10189 if (mode == CC_Zmode && y != const0_rtx)
10191 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10194 /* A scratch register is required. */
10195 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10196 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10197 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10200 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10205 /* Generate a sequence of insns that will generate the correct return
10206 address mask depending on the physical architecture that the program
10209 arm_gen_return_addr_mask (void)
10211 rtx reg = gen_reg_rtx (Pmode);
10213 emit_insn (gen_return_addr_mask (reg));
10218 arm_reload_in_hi (rtx *operands)
10220 rtx ref = operands[1];
10222 HOST_WIDE_INT offset = 0;
10224 if (GET_CODE (ref) == SUBREG)
10226 offset = SUBREG_BYTE (ref);
10227 ref = SUBREG_REG (ref);
10230 if (GET_CODE (ref) == REG)
10232 /* We have a pseudo which has been spilt onto the stack; there
10233 are two cases here: the first where there is a simple
10234 stack-slot replacement and a second where the stack-slot is
10235 out of range, or is used as a subreg. */
10236 if (reg_equiv_mem[REGNO (ref)])
10238 ref = reg_equiv_mem[REGNO (ref)];
10239 base = find_replacement (&XEXP (ref, 0));
10242 /* The slot is out of range, or was dressed up in a SUBREG. */
10243 base = reg_equiv_address[REGNO (ref)];
10246 base = find_replacement (&XEXP (ref, 0));
10248 /* Handle the case where the address is too complex to be offset by 1. */
10249 if (GET_CODE (base) == MINUS
10250 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10252 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10254 emit_set_insn (base_plus, base);
10257 else if (GET_CODE (base) == PLUS)
10259 /* The addend must be CONST_INT, or we would have dealt with it above. */
10260 HOST_WIDE_INT hi, lo;
10262 offset += INTVAL (XEXP (base, 1));
10263 base = XEXP (base, 0);
10265 /* Rework the address into a legal sequence of insns. */
10266 /* Valid range for lo is -4095 -> 4095 */
10269 : -((-offset) & 0xfff));
10271 /* Corner case, if lo is the max offset then we would be out of range
10272 once we have added the additional 1 below, so bump the msb into the
10273 pre-loading insn(s). */
10277 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10278 ^ (HOST_WIDE_INT) 0x80000000)
10279 - (HOST_WIDE_INT) 0x80000000);
10281 gcc_assert (hi + lo == offset);
10285 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10287 /* Get the base address; addsi3 knows how to handle constants
10288 that require more than one insn. */
10289 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10295 /* Operands[2] may overlap operands[0] (though it won't overlap
10296 operands[1]), that's why we asked for a DImode reg -- so we can
10297 use the bit that does not overlap. */
10298 if (REGNO (operands[2]) == REGNO (operands[0]))
10299 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10301 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10303 emit_insn (gen_zero_extendqisi2 (scratch,
10304 gen_rtx_MEM (QImode,
10305 plus_constant (base,
10307 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10308 gen_rtx_MEM (QImode,
10309 plus_constant (base,
10311 if (!BYTES_BIG_ENDIAN)
10312 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10313 gen_rtx_IOR (SImode,
10316 gen_rtx_SUBREG (SImode, operands[0], 0),
10320 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10321 gen_rtx_IOR (SImode,
10322 gen_rtx_ASHIFT (SImode, scratch,
10324 gen_rtx_SUBREG (SImode, operands[0], 0)));
10327 /* Handle storing a half-word to memory during reload by synthesizing as two
10328 byte stores. Take care not to clobber the input values until after we
10329 have moved them somewhere safe. This code assumes that if the DImode
10330 scratch in operands[2] overlaps either the input value or output address
10331 in some way, then that value must die in this insn (we absolutely need
10332 two scratch registers for some corner cases). */
10334 arm_reload_out_hi (rtx *operands)
10336 rtx ref = operands[0];
10337 rtx outval = operands[1];
10339 HOST_WIDE_INT offset = 0;
10341 if (GET_CODE (ref) == SUBREG)
10343 offset = SUBREG_BYTE (ref);
10344 ref = SUBREG_REG (ref);
10347 if (GET_CODE (ref) == REG)
10349 /* We have a pseudo which has been spilt onto the stack; there
10350 are two cases here: the first where there is a simple
10351 stack-slot replacement and a second where the stack-slot is
10352 out of range, or is used as a subreg. */
10353 if (reg_equiv_mem[REGNO (ref)])
10355 ref = reg_equiv_mem[REGNO (ref)];
10356 base = find_replacement (&XEXP (ref, 0));
10359 /* The slot is out of range, or was dressed up in a SUBREG. */
10360 base = reg_equiv_address[REGNO (ref)];
10363 base = find_replacement (&XEXP (ref, 0));
10365 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10367 /* Handle the case where the address is too complex to be offset by 1. */
10368 if (GET_CODE (base) == MINUS
10369 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10371 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10373 /* Be careful not to destroy OUTVAL. */
10374 if (reg_overlap_mentioned_p (base_plus, outval))
10376 /* Updating base_plus might destroy outval, see if we can
10377 swap the scratch and base_plus. */
10378 if (!reg_overlap_mentioned_p (scratch, outval))
10381 scratch = base_plus;
10386 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10388 /* Be conservative and copy OUTVAL into the scratch now,
10389 this should only be necessary if outval is a subreg
10390 of something larger than a word. */
10391 /* XXX Might this clobber base? I can't see how it can,
10392 since scratch is known to overlap with OUTVAL, and
10393 must be wider than a word. */
10394 emit_insn (gen_movhi (scratch_hi, outval));
10395 outval = scratch_hi;
10399 emit_set_insn (base_plus, base);
10402 else if (GET_CODE (base) == PLUS)
10404 /* The addend must be CONST_INT, or we would have dealt with it above. */
10405 HOST_WIDE_INT hi, lo;
10407 offset += INTVAL (XEXP (base, 1));
10408 base = XEXP (base, 0);
10410 /* Rework the address into a legal sequence of insns. */
10411 /* Valid range for lo is -4095 -> 4095 */
10414 : -((-offset) & 0xfff));
10416 /* Corner case, if lo is the max offset then we would be out of range
10417 once we have added the additional 1 below, so bump the msb into the
10418 pre-loading insn(s). */
10422 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10423 ^ (HOST_WIDE_INT) 0x80000000)
10424 - (HOST_WIDE_INT) 0x80000000);
10426 gcc_assert (hi + lo == offset);
10430 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10432 /* Be careful not to destroy OUTVAL. */
10433 if (reg_overlap_mentioned_p (base_plus, outval))
10435 /* Updating base_plus might destroy outval, see if we
10436 can swap the scratch and base_plus. */
10437 if (!reg_overlap_mentioned_p (scratch, outval))
10440 scratch = base_plus;
10445 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10447 /* Be conservative and copy outval into scratch now,
10448 this should only be necessary if outval is a
10449 subreg of something larger than a word. */
10450 /* XXX Might this clobber base? I can't see how it
10451 can, since scratch is known to overlap with
10453 emit_insn (gen_movhi (scratch_hi, outval));
10454 outval = scratch_hi;
10458 /* Get the base address; addsi3 knows how to handle constants
10459 that require more than one insn. */
10460 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10466 if (BYTES_BIG_ENDIAN)
10468 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10469 plus_constant (base, offset + 1)),
10470 gen_lowpart (QImode, outval)));
10471 emit_insn (gen_lshrsi3 (scratch,
10472 gen_rtx_SUBREG (SImode, outval, 0),
10474 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10475 gen_lowpart (QImode, scratch)));
10479 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10480 gen_lowpart (QImode, outval)));
10481 emit_insn (gen_lshrsi3 (scratch,
10482 gen_rtx_SUBREG (SImode, outval, 0),
10484 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10485 plus_constant (base, offset + 1)),
10486 gen_lowpart (QImode, scratch)));
10490 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10491 (padded to the size of a word) should be passed in a register. */
10494 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10496 if (TARGET_AAPCS_BASED)
10497 return must_pass_in_stack_var_size (mode, type);
10499 return must_pass_in_stack_var_size_or_pad (mode, type);
10503 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10504 Return true if an argument passed on the stack should be padded upwards,
10505 i.e. if the least-significant byte has useful data.
10506 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10507 aggregate types are placed in the lowest memory address. */
10510 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10512 if (!TARGET_AAPCS_BASED)
10513 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10515 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10522 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10523 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10524 byte of the register has useful data, and return the opposite if the
10525 most significant byte does.
10526 For AAPCS, small aggregates and small complex types are always padded
10530 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10531 tree type, int first ATTRIBUTE_UNUSED)
10533 if (TARGET_AAPCS_BASED
10534 && BYTES_BIG_ENDIAN
10535 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10536 && int_size_in_bytes (type) <= 4)
10539 /* Otherwise, use default padding. */
10540 return !BYTES_BIG_ENDIAN;
10544 /* Print a symbolic form of X to the debug file, F. */
10546 arm_print_value (FILE *f, rtx x)
10548 switch (GET_CODE (x))
10551 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10555 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10563 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
10565 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
10566 if (i < (CONST_VECTOR_NUNITS (x) - 1))
10574 fprintf (f, "\"%s\"", XSTR (x, 0));
10578 fprintf (f, "`%s'", XSTR (x, 0));
10582 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
10586 arm_print_value (f, XEXP (x, 0));
10590 arm_print_value (f, XEXP (x, 0));
10592 arm_print_value (f, XEXP (x, 1));
10600 fprintf (f, "????");
10605 /* Routines for manipulation of the constant pool. */
10607 /* Arm instructions cannot load a large constant directly into a
10608 register; they have to come from a pc relative load. The constant
10609 must therefore be placed in the addressable range of the pc
10610 relative load. Depending on the precise pc relative load
10611 instruction the range is somewhere between 256 bytes and 4k. This
10612 means that we often have to dump a constant inside a function, and
10613 generate code to branch around it.
10615 It is important to minimize this, since the branches will slow
10616 things down and make the code larger.
10618 Normally we can hide the table after an existing unconditional
10619 branch so that there is no interruption of the flow, but in the
10620 worst case the code looks like this:
10638 We fix this by performing a scan after scheduling, which notices
10639 which instructions need to have their operands fetched from the
10640 constant table and builds the table.
10642 The algorithm starts by building a table of all the constants that
10643 need fixing up and all the natural barriers in the function (places
10644 where a constant table can be dropped without breaking the flow).
10645 For each fixup we note how far the pc-relative replacement will be
10646 able to reach and the offset of the instruction into the function.
10648 Having built the table we then group the fixes together to form
10649 tables that are as large as possible (subject to addressing
10650 constraints) and emit each table of constants after the last
10651 barrier that is within range of all the instructions in the group.
10652 If a group does not contain a barrier, then we forcibly create one
10653 by inserting a jump instruction into the flow. Once the table has
10654 been inserted, the insns are then modified to reference the
10655 relevant entry in the pool.
10657 Possible enhancements to the algorithm (not implemented) are:
10659 1) For some processors and object formats, there may be benefit in
10660 aligning the pools to the start of cache lines; this alignment
10661 would need to be taken into account when calculating addressability
10664 /* These typedefs are located at the start of this file, so that
10665 they can be used in the prototypes there. This comment is to
10666 remind readers of that fact so that the following structures
10667 can be understood more easily.
10669 typedef struct minipool_node Mnode;
10670 typedef struct minipool_fixup Mfix; */
10672 struct minipool_node
10674 /* Doubly linked chain of entries. */
10677 /* The maximum offset into the code that this entry can be placed. While
10678 pushing fixes for forward references, all entries are sorted in order
10679 of increasing max_address. */
10680 HOST_WIDE_INT max_address;
10681 /* Similarly for an entry inserted for a backwards ref. */
10682 HOST_WIDE_INT min_address;
10683 /* The number of fixes referencing this entry. This can become zero
10684 if we "unpush" an entry. In this case we ignore the entry when we
10685 come to emit the code. */
10687 /* The offset from the start of the minipool. */
10688 HOST_WIDE_INT offset;
10689 /* The value in table. */
10691 /* The mode of value. */
10692 enum machine_mode mode;
10693 /* The size of the value. With iWMMXt enabled
10694 sizes > 4 also imply an alignment of 8-bytes. */
10698 struct minipool_fixup
10702 HOST_WIDE_INT address;
10704 enum machine_mode mode;
10708 HOST_WIDE_INT forwards;
10709 HOST_WIDE_INT backwards;
10712 /* Fixes less than a word need padding out to a word boundary. */
10713 #define MINIPOOL_FIX_SIZE(mode) \
10714 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
10716 static Mnode * minipool_vector_head;
10717 static Mnode * minipool_vector_tail;
10718 static rtx minipool_vector_label;
10719 static int minipool_pad;
10721 /* The linked list of all minipool fixes required for this function. */
10722 Mfix * minipool_fix_head;
10723 Mfix * minipool_fix_tail;
10724 /* The fix entry for the current minipool, once it has been placed. */
10725 Mfix * minipool_barrier;
10727 /* Determines if INSN is the start of a jump table. Returns the end
10728 of the TABLE or NULL_RTX. */
10730 is_jump_table (rtx insn)
10734 if (GET_CODE (insn) == JUMP_INSN
10735 && JUMP_LABEL (insn) != NULL
10736 && ((table = next_real_insn (JUMP_LABEL (insn)))
10737 == next_real_insn (insn))
10739 && GET_CODE (table) == JUMP_INSN
10740 && (GET_CODE (PATTERN (table)) == ADDR_VEC
10741 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
10747 #ifndef JUMP_TABLES_IN_TEXT_SECTION
10748 #define JUMP_TABLES_IN_TEXT_SECTION 0
10751 static HOST_WIDE_INT
10752 get_jump_table_size (rtx insn)
10754 /* ADDR_VECs only take room if read-only data does into the text
10756 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
10758 rtx body = PATTERN (insn);
10759 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
10760 HOST_WIDE_INT size;
10761 HOST_WIDE_INT modesize;
10763 modesize = GET_MODE_SIZE (GET_MODE (body));
10764 size = modesize * XVECLEN (body, elt);
10768 /* Round up size of TBB table to a halfword boundary. */
10769 size = (size + 1) & ~(HOST_WIDE_INT)1;
10772 /* No padding necessary for TBH. */
10775 /* Add two bytes for alignment on Thumb. */
10780 gcc_unreachable ();
10788 /* Move a minipool fix MP from its current location to before MAX_MP.
10789 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
10790 constraints may need updating. */
10792 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
10793 HOST_WIDE_INT max_address)
10795 /* The code below assumes these are different. */
10796 gcc_assert (mp != max_mp);
10798 if (max_mp == NULL)
10800 if (max_address < mp->max_address)
10801 mp->max_address = max_address;
10805 if (max_address > max_mp->max_address - mp->fix_size)
10806 mp->max_address = max_mp->max_address - mp->fix_size;
10808 mp->max_address = max_address;
10810 /* Unlink MP from its current position. Since max_mp is non-null,
10811 mp->prev must be non-null. */
10812 mp->prev->next = mp->next;
10813 if (mp->next != NULL)
10814 mp->next->prev = mp->prev;
10816 minipool_vector_tail = mp->prev;
10818 /* Re-insert it before MAX_MP. */
10820 mp->prev = max_mp->prev;
10823 if (mp->prev != NULL)
10824 mp->prev->next = mp;
10826 minipool_vector_head = mp;
10829 /* Save the new entry. */
10832 /* Scan over the preceding entries and adjust their addresses as
10834 while (mp->prev != NULL
10835 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10837 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10844 /* Add a constant to the minipool for a forward reference. Returns the
10845 node added or NULL if the constant will not fit in this pool. */
10847 add_minipool_forward_ref (Mfix *fix)
10849 /* If set, max_mp is the first pool_entry that has a lower
10850 constraint than the one we are trying to add. */
10851 Mnode * max_mp = NULL;
10852 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
10855 /* If the minipool starts before the end of FIX->INSN then this FIX
10856 can not be placed into the current pool. Furthermore, adding the
10857 new constant pool entry may cause the pool to start FIX_SIZE bytes
10859 if (minipool_vector_head &&
10860 (fix->address + get_attr_length (fix->insn)
10861 >= minipool_vector_head->max_address - fix->fix_size))
10864 /* Scan the pool to see if a constant with the same value has
10865 already been added. While we are doing this, also note the
10866 location where we must insert the constant if it doesn't already
10868 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10870 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10871 && fix->mode == mp->mode
10872 && (GET_CODE (fix->value) != CODE_LABEL
10873 || (CODE_LABEL_NUMBER (fix->value)
10874 == CODE_LABEL_NUMBER (mp->value)))
10875 && rtx_equal_p (fix->value, mp->value))
10877 /* More than one fix references this entry. */
10879 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
10882 /* Note the insertion point if necessary. */
10884 && mp->max_address > max_address)
10887 /* If we are inserting an 8-bytes aligned quantity and
10888 we have not already found an insertion point, then
10889 make sure that all such 8-byte aligned quantities are
10890 placed at the start of the pool. */
10891 if (ARM_DOUBLEWORD_ALIGN
10893 && fix->fix_size >= 8
10894 && mp->fix_size < 8)
10897 max_address = mp->max_address;
10901 /* The value is not currently in the minipool, so we need to create
10902 a new entry for it. If MAX_MP is NULL, the entry will be put on
10903 the end of the list since the placement is less constrained than
10904 any existing entry. Otherwise, we insert the new fix before
10905 MAX_MP and, if necessary, adjust the constraints on the other
10908 mp->fix_size = fix->fix_size;
10909 mp->mode = fix->mode;
10910 mp->value = fix->value;
10912 /* Not yet required for a backwards ref. */
10913 mp->min_address = -65536;
10915 if (max_mp == NULL)
10917 mp->max_address = max_address;
10919 mp->prev = minipool_vector_tail;
10921 if (mp->prev == NULL)
10923 minipool_vector_head = mp;
10924 minipool_vector_label = gen_label_rtx ();
10927 mp->prev->next = mp;
10929 minipool_vector_tail = mp;
10933 if (max_address > max_mp->max_address - mp->fix_size)
10934 mp->max_address = max_mp->max_address - mp->fix_size;
10936 mp->max_address = max_address;
10939 mp->prev = max_mp->prev;
10941 if (mp->prev != NULL)
10942 mp->prev->next = mp;
10944 minipool_vector_head = mp;
10947 /* Save the new entry. */
10950 /* Scan over the preceding entries and adjust their addresses as
10952 while (mp->prev != NULL
10953 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10955 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10963 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
10964 HOST_WIDE_INT min_address)
10966 HOST_WIDE_INT offset;
10968 /* The code below assumes these are different. */
10969 gcc_assert (mp != min_mp);
10971 if (min_mp == NULL)
10973 if (min_address > mp->min_address)
10974 mp->min_address = min_address;
10978 /* We will adjust this below if it is too loose. */
10979 mp->min_address = min_address;
10981 /* Unlink MP from its current position. Since min_mp is non-null,
10982 mp->next must be non-null. */
10983 mp->next->prev = mp->prev;
10984 if (mp->prev != NULL)
10985 mp->prev->next = mp->next;
10987 minipool_vector_head = mp->next;
10989 /* Reinsert it after MIN_MP. */
10991 mp->next = min_mp->next;
10993 if (mp->next != NULL)
10994 mp->next->prev = mp;
10996 minipool_vector_tail = mp;
11002 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11004 mp->offset = offset;
11005 if (mp->refcount > 0)
11006 offset += mp->fix_size;
11008 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11009 mp->next->min_address = mp->min_address + mp->fix_size;
11015 /* Add a constant to the minipool for a backward reference. Returns the
11016 node added or NULL if the constant will not fit in this pool.
11018 Note that the code for insertion for a backwards reference can be
11019 somewhat confusing because the calculated offsets for each fix do
11020 not take into account the size of the pool (which is still under
11023 add_minipool_backward_ref (Mfix *fix)
11025 /* If set, min_mp is the last pool_entry that has a lower constraint
11026 than the one we are trying to add. */
11027 Mnode *min_mp = NULL;
11028 /* This can be negative, since it is only a constraint. */
11029 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11032 /* If we can't reach the current pool from this insn, or if we can't
11033 insert this entry at the end of the pool without pushing other
11034 fixes out of range, then we don't try. This ensures that we
11035 can't fail later on. */
11036 if (min_address >= minipool_barrier->address
11037 || (minipool_vector_tail->min_address + fix->fix_size
11038 >= minipool_barrier->address))
11041 /* Scan the pool to see if a constant with the same value has
11042 already been added. While we are doing this, also note the
11043 location where we must insert the constant if it doesn't already
11045 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11047 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11048 && fix->mode == mp->mode
11049 && (GET_CODE (fix->value) != CODE_LABEL
11050 || (CODE_LABEL_NUMBER (fix->value)
11051 == CODE_LABEL_NUMBER (mp->value)))
11052 && rtx_equal_p (fix->value, mp->value)
11053 /* Check that there is enough slack to move this entry to the
11054 end of the table (this is conservative). */
11055 && (mp->max_address
11056 > (minipool_barrier->address
11057 + minipool_vector_tail->offset
11058 + minipool_vector_tail->fix_size)))
11061 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11064 if (min_mp != NULL)
11065 mp->min_address += fix->fix_size;
11068 /* Note the insertion point if necessary. */
11069 if (mp->min_address < min_address)
11071 /* For now, we do not allow the insertion of 8-byte alignment
11072 requiring nodes anywhere but at the start of the pool. */
11073 if (ARM_DOUBLEWORD_ALIGN
11074 && fix->fix_size >= 8 && mp->fix_size < 8)
11079 else if (mp->max_address
11080 < minipool_barrier->address + mp->offset + fix->fix_size)
11082 /* Inserting before this entry would push the fix beyond
11083 its maximum address (which can happen if we have
11084 re-located a forwards fix); force the new fix to come
11086 if (ARM_DOUBLEWORD_ALIGN
11087 && fix->fix_size >= 8 && mp->fix_size < 8)
11092 min_address = mp->min_address + fix->fix_size;
11095 /* Do not insert a non-8-byte aligned quantity before 8-byte
11096 aligned quantities. */
11097 else if (ARM_DOUBLEWORD_ALIGN
11098 && fix->fix_size < 8
11099 && mp->fix_size >= 8)
11102 min_address = mp->min_address + fix->fix_size;
11107 /* We need to create a new entry. */
11109 mp->fix_size = fix->fix_size;
11110 mp->mode = fix->mode;
11111 mp->value = fix->value;
11113 mp->max_address = minipool_barrier->address + 65536;
11115 mp->min_address = min_address;
11117 if (min_mp == NULL)
11120 mp->next = minipool_vector_head;
11122 if (mp->next == NULL)
11124 minipool_vector_tail = mp;
11125 minipool_vector_label = gen_label_rtx ();
11128 mp->next->prev = mp;
11130 minipool_vector_head = mp;
11134 mp->next = min_mp->next;
11138 if (mp->next != NULL)
11139 mp->next->prev = mp;
11141 minipool_vector_tail = mp;
11144 /* Save the new entry. */
11152 /* Scan over the following entries and adjust their offsets. */
11153 while (mp->next != NULL)
11155 if (mp->next->min_address < mp->min_address + mp->fix_size)
11156 mp->next->min_address = mp->min_address + mp->fix_size;
11159 mp->next->offset = mp->offset + mp->fix_size;
11161 mp->next->offset = mp->offset;
11170 assign_minipool_offsets (Mfix *barrier)
11172 HOST_WIDE_INT offset = 0;
11175 minipool_barrier = barrier;
11177 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11179 mp->offset = offset;
11181 if (mp->refcount > 0)
11182 offset += mp->fix_size;
11186 /* Output the literal table */
11188 dump_minipool (rtx scan)
11194 if (ARM_DOUBLEWORD_ALIGN)
11195 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11196 if (mp->refcount > 0 && mp->fix_size >= 8)
11203 fprintf (dump_file,
11204 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11205 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11207 scan = emit_label_after (gen_label_rtx (), scan);
11208 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11209 scan = emit_label_after (minipool_vector_label, scan);
11211 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11213 if (mp->refcount > 0)
11217 fprintf (dump_file,
11218 ";; Offset %u, min %ld, max %ld ",
11219 (unsigned) mp->offset, (unsigned long) mp->min_address,
11220 (unsigned long) mp->max_address);
11221 arm_print_value (dump_file, mp->value);
11222 fputc ('\n', dump_file);
11225 switch (mp->fix_size)
11227 #ifdef HAVE_consttable_1
11229 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11233 #ifdef HAVE_consttable_2
11235 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11239 #ifdef HAVE_consttable_4
11241 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11245 #ifdef HAVE_consttable_8
11247 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11251 #ifdef HAVE_consttable_16
11253 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11258 gcc_unreachable ();
11266 minipool_vector_head = minipool_vector_tail = NULL;
11267 scan = emit_insn_after (gen_consttable_end (), scan);
11268 scan = emit_barrier_after (scan);
11271 /* Return the cost of forcibly inserting a barrier after INSN. */
11273 arm_barrier_cost (rtx insn)
11275 /* Basing the location of the pool on the loop depth is preferable,
11276 but at the moment, the basic block information seems to be
11277 corrupt by this stage of the compilation. */
11278 int base_cost = 50;
11279 rtx next = next_nonnote_insn (insn);
11281 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11284 switch (GET_CODE (insn))
11287 /* It will always be better to place the table before the label, rather
11296 return base_cost - 10;
11299 return base_cost + 10;
11303 /* Find the best place in the insn stream in the range
11304 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11305 Create the barrier by inserting a jump and add a new fix entry for
11308 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11310 HOST_WIDE_INT count = 0;
11312 rtx from = fix->insn;
11313 /* The instruction after which we will insert the jump. */
11314 rtx selected = NULL;
11316 /* The address at which the jump instruction will be placed. */
11317 HOST_WIDE_INT selected_address;
11319 HOST_WIDE_INT max_count = max_address - fix->address;
11320 rtx label = gen_label_rtx ();
11322 selected_cost = arm_barrier_cost (from);
11323 selected_address = fix->address;
11325 while (from && count < max_count)
11330 /* This code shouldn't have been called if there was a natural barrier
11332 gcc_assert (GET_CODE (from) != BARRIER);
11334 /* Count the length of this insn. */
11335 count += get_attr_length (from);
11337 /* If there is a jump table, add its length. */
11338 tmp = is_jump_table (from);
11341 count += get_jump_table_size (tmp);
11343 /* Jump tables aren't in a basic block, so base the cost on
11344 the dispatch insn. If we select this location, we will
11345 still put the pool after the table. */
11346 new_cost = arm_barrier_cost (from);
11348 if (count < max_count
11349 && (!selected || new_cost <= selected_cost))
11352 selected_cost = new_cost;
11353 selected_address = fix->address + count;
11356 /* Continue after the dispatch table. */
11357 from = NEXT_INSN (tmp);
11361 new_cost = arm_barrier_cost (from);
11363 if (count < max_count
11364 && (!selected || new_cost <= selected_cost))
11367 selected_cost = new_cost;
11368 selected_address = fix->address + count;
11371 from = NEXT_INSN (from);
11374 /* Make sure that we found a place to insert the jump. */
11375 gcc_assert (selected);
11377 /* Create a new JUMP_INSN that branches around a barrier. */
11378 from = emit_jump_insn_after (gen_jump (label), selected);
11379 JUMP_LABEL (from) = label;
11380 barrier = emit_barrier_after (from);
11381 emit_label_after (label, barrier);
11383 /* Create a minipool barrier entry for the new barrier. */
11384 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11385 new_fix->insn = barrier;
11386 new_fix->address = selected_address;
11387 new_fix->next = fix->next;
11388 fix->next = new_fix;
11393 /* Record that there is a natural barrier in the insn stream at
11396 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11398 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11401 fix->address = address;
11404 if (minipool_fix_head != NULL)
11405 minipool_fix_tail->next = fix;
11407 minipool_fix_head = fix;
11409 minipool_fix_tail = fix;
11412 /* Record INSN, which will need fixing up to load a value from the
11413 minipool. ADDRESS is the offset of the insn since the start of the
11414 function; LOC is a pointer to the part of the insn which requires
11415 fixing; VALUE is the constant that must be loaded, which is of type
11418 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11419 enum machine_mode mode, rtx value)
11421 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11424 fix->address = address;
11427 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11428 fix->value = value;
11429 fix->forwards = get_attr_pool_range (insn);
11430 fix->backwards = get_attr_neg_pool_range (insn);
11431 fix->minipool = NULL;
11433 /* If an insn doesn't have a range defined for it, then it isn't
11434 expecting to be reworked by this code. Better to stop now than
11435 to generate duff assembly code. */
11436 gcc_assert (fix->forwards || fix->backwards);
11438 /* If an entry requires 8-byte alignment then assume all constant pools
11439 require 4 bytes of padding. Trying to do this later on a per-pool
11440 basis is awkward because existing pool entries have to be modified. */
11441 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11446 fprintf (dump_file,
11447 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11448 GET_MODE_NAME (mode),
11449 INSN_UID (insn), (unsigned long) address,
11450 -1 * (long)fix->backwards, (long)fix->forwards);
11451 arm_print_value (dump_file, fix->value);
11452 fprintf (dump_file, "\n");
11455 /* Add it to the chain of fixes. */
11458 if (minipool_fix_head != NULL)
11459 minipool_fix_tail->next = fix;
11461 minipool_fix_head = fix;
11463 minipool_fix_tail = fix;
11466 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11467 Returns the number of insns needed, or 99 if we don't know how to
11470 arm_const_double_inline_cost (rtx val)
11472 rtx lowpart, highpart;
11473 enum machine_mode mode;
11475 mode = GET_MODE (val);
11477 if (mode == VOIDmode)
11480 gcc_assert (GET_MODE_SIZE (mode) == 8);
11482 lowpart = gen_lowpart (SImode, val);
11483 highpart = gen_highpart_mode (SImode, mode, val);
11485 gcc_assert (GET_CODE (lowpart) == CONST_INT);
11486 gcc_assert (GET_CODE (highpart) == CONST_INT);
11488 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
11489 NULL_RTX, NULL_RTX, 0, 0)
11490 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11491 NULL_RTX, NULL_RTX, 0, 0));
11494 /* Return true if it is worthwhile to split a 64-bit constant into two
11495 32-bit operations. This is the case if optimizing for size, or
11496 if we have load delay slots, or if one 32-bit part can be done with
11497 a single data operation. */
11499 arm_const_double_by_parts (rtx val)
11501 enum machine_mode mode = GET_MODE (val);
11504 if (optimize_size || arm_ld_sched)
11507 if (mode == VOIDmode)
11510 part = gen_highpart_mode (SImode, mode, val);
11512 gcc_assert (GET_CODE (part) == CONST_INT);
11514 if (const_ok_for_arm (INTVAL (part))
11515 || const_ok_for_arm (~INTVAL (part)))
11518 part = gen_lowpart (SImode, val);
11520 gcc_assert (GET_CODE (part) == CONST_INT);
11522 if (const_ok_for_arm (INTVAL (part))
11523 || const_ok_for_arm (~INTVAL (part)))
11529 /* Return true if it is possible to inline both the high and low parts
11530 of a 64-bit constant into 32-bit data processing instructions. */
11532 arm_const_double_by_immediates (rtx val)
11534 enum machine_mode mode = GET_MODE (val);
11537 if (mode == VOIDmode)
11540 part = gen_highpart_mode (SImode, mode, val);
11542 gcc_assert (GET_CODE (part) == CONST_INT);
11544 if (!const_ok_for_arm (INTVAL (part)))
11547 part = gen_lowpart (SImode, val);
11549 gcc_assert (GET_CODE (part) == CONST_INT);
11551 if (!const_ok_for_arm (INTVAL (part)))
11557 /* Scan INSN and note any of its operands that need fixing.
11558 If DO_PUSHES is false we do not actually push any of the fixups
11559 needed. The function returns TRUE if any fixups were needed/pushed.
11560 This is used by arm_memory_load_p() which needs to know about loads
11561 of constants that will be converted into minipool loads. */
11563 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
11565 bool result = false;
11568 extract_insn (insn);
11570 if (!constrain_operands (1))
11571 fatal_insn_not_found (insn);
11573 if (recog_data.n_alternatives == 0)
11576 /* Fill in recog_op_alt with information about the constraints of
11578 preprocess_constraints ();
11580 for (opno = 0; opno < recog_data.n_operands; opno++)
11582 /* Things we need to fix can only occur in inputs. */
11583 if (recog_data.operand_type[opno] != OP_IN)
11586 /* If this alternative is a memory reference, then any mention
11587 of constants in this alternative is really to fool reload
11588 into allowing us to accept one there. We need to fix them up
11589 now so that we output the right code. */
11590 if (recog_op_alt[opno][which_alternative].memory_ok)
11592 rtx op = recog_data.operand[opno];
11594 if (CONSTANT_P (op))
11597 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
11598 recog_data.operand_mode[opno], op);
11601 else if (GET_CODE (op) == MEM
11602 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
11603 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
11607 rtx cop = avoid_constant_pool_reference (op);
11609 /* Casting the address of something to a mode narrower
11610 than a word can cause avoid_constant_pool_reference()
11611 to return the pool reference itself. That's no good to
11612 us here. Lets just hope that we can use the
11613 constant pool value directly. */
11615 cop = get_pool_constant (XEXP (op, 0));
11617 push_minipool_fix (insn, address,
11618 recog_data.operand_loc[opno],
11619 recog_data.operand_mode[opno], cop);
11630 /* Convert instructions to their cc-clobbering variant if possible, since
11631 that allows us to use smaller encodings. */
11634 thumb2_reorg (void)
11639 INIT_REG_SET (&live);
11641 /* We are freeing block_for_insn in the toplev to keep compatibility
11642 with old MDEP_REORGS that are not CFG based. Recompute it now. */
11643 compute_bb_for_insn ();
11649 COPY_REG_SET (&live, DF_LR_OUT (bb));
11650 df_simulate_initialize_backwards (bb, &live);
11651 FOR_BB_INSNS_REVERSE (bb, insn)
11653 if (NONJUMP_INSN_P (insn)
11654 && !REGNO_REG_SET_P (&live, CC_REGNUM))
11656 rtx pat = PATTERN (insn);
11657 if (GET_CODE (pat) == SET
11658 && low_register_operand (XEXP (pat, 0), SImode)
11659 && thumb_16bit_operator (XEXP (pat, 1), SImode)
11660 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
11661 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
11663 rtx dst = XEXP (pat, 0);
11664 rtx src = XEXP (pat, 1);
11665 rtx op0 = XEXP (src, 0);
11666 if (rtx_equal_p (dst, op0)
11667 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
11669 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
11670 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
11671 rtvec vec = gen_rtvec (2, pat, clobber);
11672 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
11673 INSN_CODE (insn) = -1;
11677 if (NONDEBUG_INSN_P (insn))
11678 df_simulate_one_insn_backwards (bb, insn, &live);
11681 CLEAR_REG_SET (&live);
11684 /* Gcc puts the pool in the wrong place for ARM, since we can only
11685 load addresses a limited distance around the pc. We do some
11686 special munging to move the constant pool values to the correct
11687 point in the code. */
11692 HOST_WIDE_INT address = 0;
11698 minipool_fix_head = minipool_fix_tail = NULL;
11700 /* The first insn must always be a note, or the code below won't
11701 scan it properly. */
11702 insn = get_insns ();
11703 gcc_assert (GET_CODE (insn) == NOTE);
11706 /* Scan all the insns and record the operands that will need fixing. */
11707 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
11709 if (TARGET_CIRRUS_FIX_INVALID_INSNS
11710 && (arm_cirrus_insn_p (insn)
11711 || GET_CODE (insn) == JUMP_INSN
11712 || arm_memory_load_p (insn)))
11713 cirrus_reorg (insn);
11715 if (GET_CODE (insn) == BARRIER)
11716 push_minipool_barrier (insn, address);
11717 else if (INSN_P (insn))
11721 note_invalid_constants (insn, address, true);
11722 address += get_attr_length (insn);
11724 /* If the insn is a vector jump, add the size of the table
11725 and skip the table. */
11726 if ((table = is_jump_table (insn)) != NULL)
11728 address += get_jump_table_size (table);
11734 fix = minipool_fix_head;
11736 /* Now scan the fixups and perform the required changes. */
11741 Mfix * last_added_fix;
11742 Mfix * last_barrier = NULL;
11745 /* Skip any further barriers before the next fix. */
11746 while (fix && GET_CODE (fix->insn) == BARRIER)
11749 /* No more fixes. */
11753 last_added_fix = NULL;
11755 for (ftmp = fix; ftmp; ftmp = ftmp->next)
11757 if (GET_CODE (ftmp->insn) == BARRIER)
11759 if (ftmp->address >= minipool_vector_head->max_address)
11762 last_barrier = ftmp;
11764 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
11767 last_added_fix = ftmp; /* Keep track of the last fix added. */
11770 /* If we found a barrier, drop back to that; any fixes that we
11771 could have reached but come after the barrier will now go in
11772 the next mini-pool. */
11773 if (last_barrier != NULL)
11775 /* Reduce the refcount for those fixes that won't go into this
11777 for (fdel = last_barrier->next;
11778 fdel && fdel != ftmp;
11781 fdel->minipool->refcount--;
11782 fdel->minipool = NULL;
11785 ftmp = last_barrier;
11789 /* ftmp is first fix that we can't fit into this pool and
11790 there no natural barriers that we could use. Insert a
11791 new barrier in the code somewhere between the previous
11792 fix and this one, and arrange to jump around it. */
11793 HOST_WIDE_INT max_address;
11795 /* The last item on the list of fixes must be a barrier, so
11796 we can never run off the end of the list of fixes without
11797 last_barrier being set. */
11800 max_address = minipool_vector_head->max_address;
11801 /* Check that there isn't another fix that is in range that
11802 we couldn't fit into this pool because the pool was
11803 already too large: we need to put the pool before such an
11804 instruction. The pool itself may come just after the
11805 fix because create_fix_barrier also allows space for a
11806 jump instruction. */
11807 if (ftmp->address < max_address)
11808 max_address = ftmp->address + 1;
11810 last_barrier = create_fix_barrier (last_added_fix, max_address);
11813 assign_minipool_offsets (last_barrier);
11817 if (GET_CODE (ftmp->insn) != BARRIER
11818 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
11825 /* Scan over the fixes we have identified for this pool, fixing them
11826 up and adding the constants to the pool itself. */
11827 for (this_fix = fix; this_fix && ftmp != this_fix;
11828 this_fix = this_fix->next)
11829 if (GET_CODE (this_fix->insn) != BARRIER)
11832 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
11833 minipool_vector_label),
11834 this_fix->minipool->offset);
11835 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
11838 dump_minipool (last_barrier->insn);
11842 /* From now on we must synthesize any constants that we can't handle
11843 directly. This can happen if the RTL gets split during final
11844 instruction generation. */
11845 after_arm_reorg = 1;
11847 /* Free the minipool memory. */
11848 obstack_free (&minipool_obstack, minipool_startobj);
11851 /* Routines to output assembly language. */
11853 /* If the rtx is the correct value then return the string of the number.
11854 In this way we can ensure that valid double constants are generated even
11855 when cross compiling. */
11857 fp_immediate_constant (rtx x)
11862 if (!fp_consts_inited)
11865 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11866 for (i = 0; i < 8; i++)
11867 if (REAL_VALUES_EQUAL (r, values_fp[i]))
11868 return strings_fp[i];
11870 gcc_unreachable ();
11873 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
11874 static const char *
11875 fp_const_from_val (REAL_VALUE_TYPE *r)
11879 if (!fp_consts_inited)
11882 for (i = 0; i < 8; i++)
11883 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
11884 return strings_fp[i];
11886 gcc_unreachable ();
11889 /* Output the operands of a LDM/STM instruction to STREAM.
11890 MASK is the ARM register set mask of which only bits 0-15 are important.
11891 REG is the base register, either the frame pointer or the stack pointer,
11892 INSTR is the possibly suffixed load or store instruction.
11893 RFE is nonzero if the instruction should also copy spsr to cpsr. */
11896 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
11897 unsigned long mask, int rfe)
11900 bool not_first = FALSE;
11902 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
11903 fputc ('\t', stream);
11904 asm_fprintf (stream, instr, reg);
11905 fputc ('{', stream);
11907 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11908 if (mask & (1 << i))
11911 fprintf (stream, ", ");
11913 asm_fprintf (stream, "%r", i);
11918 fprintf (stream, "}^\n");
11920 fprintf (stream, "}\n");
11924 /* Output a FLDMD instruction to STREAM.
11925 BASE if the register containing the address.
11926 REG and COUNT specify the register range.
11927 Extra registers may be added to avoid hardware bugs.
11929 We output FLDMD even for ARMv5 VFP implementations. Although
11930 FLDMD is technically not supported until ARMv6, it is believed
11931 that all VFP implementations support its use in this context. */
11934 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
11938 /* Workaround ARM10 VFPr1 bug. */
11939 if (count == 2 && !arm_arch6)
11946 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
11947 load into multiple parts if we have to handle more than 16 registers. */
11950 vfp_output_fldmd (stream, base, reg, 16);
11951 vfp_output_fldmd (stream, base, reg + 16, count - 16);
11955 fputc ('\t', stream);
11956 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
11958 for (i = reg; i < reg + count; i++)
11961 fputs (", ", stream);
11962 asm_fprintf (stream, "d%d", i);
11964 fputs ("}\n", stream);
11969 /* Output the assembly for a store multiple. */
11972 vfp_output_fstmd (rtx * operands)
11979 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
11980 p = strlen (pattern);
11982 gcc_assert (GET_CODE (operands[1]) == REG);
11984 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
11985 for (i = 1; i < XVECLEN (operands[2], 0); i++)
11987 p += sprintf (&pattern[p], ", d%d", base + i);
11989 strcpy (&pattern[p], "}");
11991 output_asm_insn (pattern, operands);
11996 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
11997 number of bytes pushed. */
12000 vfp_emit_fstmd (int base_reg, int count)
12007 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12008 register pairs are stored by a store multiple insn. We avoid this
12009 by pushing an extra pair. */
12010 if (count == 2 && !arm_arch6)
12012 if (base_reg == LAST_VFP_REGNUM - 3)
12017 /* FSTMD may not store more than 16 doubleword registers at once. Split
12018 larger stores into multiple parts (up to a maximum of two, in
12023 /* NOTE: base_reg is an internal register number, so each D register
12025 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12026 saved += vfp_emit_fstmd (base_reg, 16);
12030 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12031 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12033 reg = gen_rtx_REG (DFmode, base_reg);
12036 XVECEXP (par, 0, 0)
12037 = gen_rtx_SET (VOIDmode,
12040 gen_rtx_PRE_MODIFY (Pmode,
12043 (stack_pointer_rtx,
12046 gen_rtx_UNSPEC (BLKmode,
12047 gen_rtvec (1, reg),
12048 UNSPEC_PUSH_MULT));
12050 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12051 plus_constant (stack_pointer_rtx, -(count * 8)));
12052 RTX_FRAME_RELATED_P (tmp) = 1;
12053 XVECEXP (dwarf, 0, 0) = tmp;
12055 tmp = gen_rtx_SET (VOIDmode,
12056 gen_frame_mem (DFmode, stack_pointer_rtx),
12058 RTX_FRAME_RELATED_P (tmp) = 1;
12059 XVECEXP (dwarf, 0, 1) = tmp;
12061 for (i = 1; i < count; i++)
12063 reg = gen_rtx_REG (DFmode, base_reg);
12065 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12067 tmp = gen_rtx_SET (VOIDmode,
12068 gen_frame_mem (DFmode,
12069 plus_constant (stack_pointer_rtx,
12072 RTX_FRAME_RELATED_P (tmp) = 1;
12073 XVECEXP (dwarf, 0, i + 1) = tmp;
12076 par = emit_insn (par);
12077 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12078 RTX_FRAME_RELATED_P (par) = 1;
12083 /* Emit a call instruction with pattern PAT. ADDR is the address of
12084 the call target. */
12087 arm_emit_call_insn (rtx pat, rtx addr)
12091 insn = emit_call_insn (pat);
12093 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12094 If the call might use such an entry, add a use of the PIC register
12095 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12096 if (TARGET_VXWORKS_RTP
12098 && GET_CODE (addr) == SYMBOL_REF
12099 && (SYMBOL_REF_DECL (addr)
12100 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12101 : !SYMBOL_REF_LOCAL_P (addr)))
12103 require_pic_register ();
12104 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12108 /* Output a 'call' insn. */
12110 output_call (rtx *operands)
12112 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12114 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12115 if (REGNO (operands[0]) == LR_REGNUM)
12117 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12118 output_asm_insn ("mov%?\t%0, %|lr", operands);
12121 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12123 if (TARGET_INTERWORK || arm_arch4t)
12124 output_asm_insn ("bx%?\t%0", operands);
12126 output_asm_insn ("mov%?\t%|pc, %0", operands);
12131 /* Output a 'call' insn that is a reference in memory. This is
12132 disabled for ARMv5 and we prefer a blx instead because otherwise
12133 there's a significant performance overhead. */
12135 output_call_mem (rtx *operands)
12137 gcc_assert (!arm_arch5);
12138 if (TARGET_INTERWORK)
12140 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12141 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12142 output_asm_insn ("bx%?\t%|ip", operands);
12144 else if (regno_use_in (LR_REGNUM, operands[0]))
12146 /* LR is used in the memory address. We load the address in the
12147 first instruction. It's safe to use IP as the target of the
12148 load since the call will kill it anyway. */
12149 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12150 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12152 output_asm_insn ("bx%?\t%|ip", operands);
12154 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12158 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12159 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12166 /* Output a move from arm registers to an fpa registers.
12167 OPERANDS[0] is an fpa register.
12168 OPERANDS[1] is the first registers of an arm register pair. */
12170 output_mov_long_double_fpa_from_arm (rtx *operands)
12172 int arm_reg0 = REGNO (operands[1]);
12175 gcc_assert (arm_reg0 != IP_REGNUM);
12177 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12178 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12179 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12181 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12182 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12187 /* Output a move from an fpa register to arm registers.
12188 OPERANDS[0] is the first registers of an arm register pair.
12189 OPERANDS[1] is an fpa register. */
12191 output_mov_long_double_arm_from_fpa (rtx *operands)
12193 int arm_reg0 = REGNO (operands[0]);
12196 gcc_assert (arm_reg0 != IP_REGNUM);
12198 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12199 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12200 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12202 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12203 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12207 /* Output a move from arm registers to arm registers of a long double
12208 OPERANDS[0] is the destination.
12209 OPERANDS[1] is the source. */
12211 output_mov_long_double_arm_from_arm (rtx *operands)
12213 /* We have to be careful here because the two might overlap. */
12214 int dest_start = REGNO (operands[0]);
12215 int src_start = REGNO (operands[1]);
12219 if (dest_start < src_start)
12221 for (i = 0; i < 3; i++)
12223 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12224 ops[1] = gen_rtx_REG (SImode, src_start + i);
12225 output_asm_insn ("mov%?\t%0, %1", ops);
12230 for (i = 2; i >= 0; i--)
12232 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12233 ops[1] = gen_rtx_REG (SImode, src_start + i);
12234 output_asm_insn ("mov%?\t%0, %1", ops);
12242 arm_emit_movpair (rtx dest, rtx src)
12244 /* If the src is an immediate, simplify it. */
12245 if (CONST_INT_P (src))
12247 HOST_WIDE_INT val = INTVAL (src);
12248 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12249 if ((val >> 16) & 0x0000ffff)
12250 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12252 GEN_INT ((val >> 16) & 0x0000ffff));
12255 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12256 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12259 /* Output a move from arm registers to an fpa registers.
12260 OPERANDS[0] is an fpa register.
12261 OPERANDS[1] is the first registers of an arm register pair. */
12263 output_mov_double_fpa_from_arm (rtx *operands)
12265 int arm_reg0 = REGNO (operands[1]);
12268 gcc_assert (arm_reg0 != IP_REGNUM);
12270 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12271 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12272 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12273 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12277 /* Output a move from an fpa register to arm registers.
12278 OPERANDS[0] is the first registers of an arm register pair.
12279 OPERANDS[1] is an fpa register. */
12281 output_mov_double_arm_from_fpa (rtx *operands)
12283 int arm_reg0 = REGNO (operands[0]);
12286 gcc_assert (arm_reg0 != IP_REGNUM);
12288 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12289 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12290 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12291 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12295 /* Output a move between double words.
12296 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12297 or MEM<-REG and all MEMs must be offsettable addresses. */
12299 output_move_double (rtx *operands)
12301 enum rtx_code code0 = GET_CODE (operands[0]);
12302 enum rtx_code code1 = GET_CODE (operands[1]);
12307 unsigned int reg0 = REGNO (operands[0]);
12309 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12311 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12313 switch (GET_CODE (XEXP (operands[1], 0)))
12317 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12318 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12320 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12324 gcc_assert (TARGET_LDRD);
12325 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12330 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12332 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12337 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12339 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12343 gcc_assert (TARGET_LDRD);
12344 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12349 /* Autoicrement addressing modes should never have overlapping
12350 base and destination registers, and overlapping index registers
12351 are already prohibited, so this doesn't need to worry about
12353 otherops[0] = operands[0];
12354 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12355 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12357 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12359 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12361 /* Registers overlap so split out the increment. */
12362 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12363 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12367 /* Use a single insn if we can.
12368 FIXME: IWMMXT allows offsets larger than ldrd can
12369 handle, fix these up with a pair of ldr. */
12371 || GET_CODE (otherops[2]) != CONST_INT
12372 || (INTVAL (otherops[2]) > -256
12373 && INTVAL (otherops[2]) < 256))
12374 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12377 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12378 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12384 /* Use a single insn if we can.
12385 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12386 fix these up with a pair of ldr. */
12388 || GET_CODE (otherops[2]) != CONST_INT
12389 || (INTVAL (otherops[2]) > -256
12390 && INTVAL (otherops[2]) < 256))
12391 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12394 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12395 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12402 /* We might be able to use ldrd %0, %1 here. However the range is
12403 different to ldr/adr, and it is broken on some ARMv7-M
12404 implementations. */
12405 /* Use the second register of the pair to avoid problematic
12407 otherops[1] = operands[1];
12408 output_asm_insn ("adr%?\t%0, %1", otherops);
12409 operands[1] = otherops[0];
12411 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12413 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12416 /* ??? This needs checking for thumb2. */
12418 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12419 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12421 otherops[0] = operands[0];
12422 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12423 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12425 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12427 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12429 switch ((int) INTVAL (otherops[2]))
12432 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12437 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12442 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12446 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12447 operands[1] = otherops[0];
12449 && (GET_CODE (otherops[2]) == REG
12451 || (GET_CODE (otherops[2]) == CONST_INT
12452 && INTVAL (otherops[2]) > -256
12453 && INTVAL (otherops[2]) < 256)))
12455 if (reg_overlap_mentioned_p (operands[0],
12459 /* Swap base and index registers over to
12460 avoid a conflict. */
12462 otherops[1] = otherops[2];
12465 /* If both registers conflict, it will usually
12466 have been fixed by a splitter. */
12467 if (reg_overlap_mentioned_p (operands[0], otherops[2])
12468 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
12470 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12471 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12475 otherops[0] = operands[0];
12476 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
12481 if (GET_CODE (otherops[2]) == CONST_INT)
12483 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
12484 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
12486 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12489 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12492 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
12495 return "ldr%(d%)\t%0, [%1]";
12497 return "ldm%(ia%)\t%1, %M0";
12501 otherops[1] = adjust_address (operands[1], SImode, 4);
12502 /* Take care of overlapping base/data reg. */
12503 if (reg_mentioned_p (operands[0], operands[1]))
12505 output_asm_insn ("ldr%?\t%0, %1", otherops);
12506 output_asm_insn ("ldr%?\t%0, %1", operands);
12510 output_asm_insn ("ldr%?\t%0, %1", operands);
12511 output_asm_insn ("ldr%?\t%0, %1", otherops);
12518 /* Constraints should ensure this. */
12519 gcc_assert (code0 == MEM && code1 == REG);
12520 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
12522 switch (GET_CODE (XEXP (operands[0], 0)))
12526 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
12528 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12532 gcc_assert (TARGET_LDRD);
12533 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
12538 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
12540 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
12545 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
12547 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
12551 gcc_assert (TARGET_LDRD);
12552 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
12557 otherops[0] = operands[1];
12558 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
12559 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
12561 /* IWMMXT allows offsets larger than ldrd can handle,
12562 fix these up with a pair of ldr. */
12564 && GET_CODE (otherops[2]) == CONST_INT
12565 && (INTVAL(otherops[2]) <= -256
12566 || INTVAL(otherops[2]) >= 256))
12568 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12570 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12571 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12575 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12576 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12579 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12580 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
12582 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
12586 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
12587 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12589 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
12592 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
12598 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
12604 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
12609 && (GET_CODE (otherops[2]) == REG
12611 || (GET_CODE (otherops[2]) == CONST_INT
12612 && INTVAL (otherops[2]) > -256
12613 && INTVAL (otherops[2]) < 256)))
12615 otherops[0] = operands[1];
12616 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
12617 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
12623 otherops[0] = adjust_address (operands[0], SImode, 4);
12624 otherops[1] = operands[1];
12625 output_asm_insn ("str%?\t%1, %0", operands);
12626 output_asm_insn ("str%?\t%H1, %0", otherops);
12633 /* Output a move, load or store for quad-word vectors in ARM registers. Only
12634 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
12637 output_move_quad (rtx *operands)
12639 if (REG_P (operands[0]))
12641 /* Load, or reg->reg move. */
12643 if (MEM_P (operands[1]))
12645 switch (GET_CODE (XEXP (operands[1], 0)))
12648 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12653 output_asm_insn ("adr%?\t%0, %1", operands);
12654 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
12658 gcc_unreachable ();
12666 gcc_assert (REG_P (operands[1]));
12668 dest = REGNO (operands[0]);
12669 src = REGNO (operands[1]);
12671 /* This seems pretty dumb, but hopefully GCC won't try to do it
12674 for (i = 0; i < 4; i++)
12676 ops[0] = gen_rtx_REG (SImode, dest + i);
12677 ops[1] = gen_rtx_REG (SImode, src + i);
12678 output_asm_insn ("mov%?\t%0, %1", ops);
12681 for (i = 3; i >= 0; i--)
12683 ops[0] = gen_rtx_REG (SImode, dest + i);
12684 ops[1] = gen_rtx_REG (SImode, src + i);
12685 output_asm_insn ("mov%?\t%0, %1", ops);
12691 gcc_assert (MEM_P (operands[0]));
12692 gcc_assert (REG_P (operands[1]));
12693 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
12695 switch (GET_CODE (XEXP (operands[0], 0)))
12698 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12702 gcc_unreachable ();
12709 /* Output a VFP load or store instruction. */
12712 output_move_vfp (rtx *operands)
12714 rtx reg, mem, addr, ops[2];
12715 int load = REG_P (operands[0]);
12716 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
12717 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
12720 enum machine_mode mode;
12722 reg = operands[!load];
12723 mem = operands[load];
12725 mode = GET_MODE (reg);
12727 gcc_assert (REG_P (reg));
12728 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
12729 gcc_assert (mode == SFmode
12733 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
12734 gcc_assert (MEM_P (mem));
12736 addr = XEXP (mem, 0);
12738 switch (GET_CODE (addr))
12741 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
12742 ops[0] = XEXP (addr, 0);
12747 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
12748 ops[0] = XEXP (addr, 0);
12753 templ = "f%s%c%%?\t%%%s0, %%1%s";
12759 sprintf (buff, templ,
12760 load ? "ld" : "st",
12763 integer_p ? "\t%@ int" : "");
12764 output_asm_insn (buff, ops);
12769 /* Output a Neon quad-word load or store, or a load or store for
12770 larger structure modes.
12772 WARNING: The ordering of elements is weird in big-endian mode,
12773 because we use VSTM, as required by the EABI. GCC RTL defines
12774 element ordering based on in-memory order. This can be differ
12775 from the architectural ordering of elements within a NEON register.
12776 The intrinsics defined in arm_neon.h use the NEON register element
12777 ordering, not the GCC RTL element ordering.
12779 For example, the in-memory ordering of a big-endian a quadword
12780 vector with 16-bit elements when stored from register pair {d0,d1}
12781 will be (lowest address first, d0[N] is NEON register element N):
12783 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
12785 When necessary, quadword registers (dN, dN+1) are moved to ARM
12786 registers from rN in the order:
12788 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
12790 So that STM/LDM can be used on vectors in ARM registers, and the
12791 same memory layout will result as if VSTM/VLDM were used. */
12794 output_move_neon (rtx *operands)
12796 rtx reg, mem, addr, ops[2];
12797 int regno, load = REG_P (operands[0]);
12800 enum machine_mode mode;
12802 reg = operands[!load];
12803 mem = operands[load];
12805 mode = GET_MODE (reg);
12807 gcc_assert (REG_P (reg));
12808 regno = REGNO (reg);
12809 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
12810 || NEON_REGNO_OK_FOR_QUAD (regno));
12811 gcc_assert (VALID_NEON_DREG_MODE (mode)
12812 || VALID_NEON_QREG_MODE (mode)
12813 || VALID_NEON_STRUCT_MODE (mode));
12814 gcc_assert (MEM_P (mem));
12816 addr = XEXP (mem, 0);
12818 /* Strip off const from addresses like (const (plus (...))). */
12819 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12820 addr = XEXP (addr, 0);
12822 switch (GET_CODE (addr))
12825 templ = "v%smia%%?\t%%0!, %%h1";
12826 ops[0] = XEXP (addr, 0);
12831 /* FIXME: We should be using vld1/vst1 here in BE mode? */
12832 templ = "v%smdb%%?\t%%0!, %%h1";
12833 ops[0] = XEXP (addr, 0);
12838 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
12839 gcc_unreachable ();
12844 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12847 for (i = 0; i < nregs; i++)
12849 /* We're only using DImode here because it's a convenient size. */
12850 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
12851 ops[1] = adjust_address (mem, DImode, 8 * i);
12852 if (reg_overlap_mentioned_p (ops[0], mem))
12854 gcc_assert (overlap == -1);
12859 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12860 output_asm_insn (buff, ops);
12865 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
12866 ops[1] = adjust_address (mem, SImode, 8 * overlap);
12867 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12868 output_asm_insn (buff, ops);
12875 templ = "v%smia%%?\t%%m0, %%h1";
12880 sprintf (buff, templ, load ? "ld" : "st");
12881 output_asm_insn (buff, ops);
12886 /* Compute and return the length of neon_mov<mode>, where <mode> is
12887 one of VSTRUCT modes: EI, OI, CI or XI. */
12889 arm_attr_length_move_neon (rtx insn)
12891 rtx reg, mem, addr;
12893 enum machine_mode mode;
12895 extract_insn_cached (insn);
12897 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
12899 mode = GET_MODE (recog_data.operand[0]);
12910 gcc_unreachable ();
12914 load = REG_P (recog_data.operand[0]);
12915 reg = recog_data.operand[!load];
12916 mem = recog_data.operand[load];
12918 gcc_assert (MEM_P (mem));
12920 mode = GET_MODE (reg);
12921 addr = XEXP (mem, 0);
12923 /* Strip off const from addresses like (const (plus (...))). */
12924 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12925 addr = XEXP (addr, 0);
12927 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
12929 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12936 /* Output an ADD r, s, #n where n may be too big for one instruction.
12937 If adding zero to one register, output nothing. */
12939 output_add_immediate (rtx *operands)
12941 HOST_WIDE_INT n = INTVAL (operands[2]);
12943 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
12946 output_multi_immediate (operands,
12947 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
12950 output_multi_immediate (operands,
12951 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
12958 /* Output a multiple immediate operation.
12959 OPERANDS is the vector of operands referred to in the output patterns.
12960 INSTR1 is the output pattern to use for the first constant.
12961 INSTR2 is the output pattern to use for subsequent constants.
12962 IMMED_OP is the index of the constant slot in OPERANDS.
12963 N is the constant value. */
12964 static const char *
12965 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
12966 int immed_op, HOST_WIDE_INT n)
12968 #if HOST_BITS_PER_WIDE_INT > 32
12974 /* Quick and easy output. */
12975 operands[immed_op] = const0_rtx;
12976 output_asm_insn (instr1, operands);
12981 const char * instr = instr1;
12983 /* Note that n is never zero here (which would give no output). */
12984 for (i = 0; i < 32; i += 2)
12988 operands[immed_op] = GEN_INT (n & (255 << i));
12989 output_asm_insn (instr, operands);
12999 /* Return the name of a shifter operation. */
13000 static const char *
13001 arm_shift_nmem(enum rtx_code code)
13006 return ARM_LSL_NAME;
13022 /* Return the appropriate ARM instruction for the operation code.
13023 The returned result should not be overwritten. OP is the rtx of the
13024 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13027 arithmetic_instr (rtx op, int shift_first_arg)
13029 switch (GET_CODE (op))
13035 return shift_first_arg ? "rsb" : "sub";
13050 return arm_shift_nmem(GET_CODE(op));
13053 gcc_unreachable ();
13057 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13058 for the operation code. The returned result should not be overwritten.
13059 OP is the rtx code of the shift.
13060 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13062 static const char *
13063 shift_op (rtx op, HOST_WIDE_INT *amountp)
13066 enum rtx_code code = GET_CODE (op);
13068 switch (GET_CODE (XEXP (op, 1)))
13076 *amountp = INTVAL (XEXP (op, 1));
13080 gcc_unreachable ();
13086 gcc_assert (*amountp != -1);
13087 *amountp = 32 - *amountp;
13090 /* Fall through. */
13096 mnem = arm_shift_nmem(code);
13100 /* We never have to worry about the amount being other than a
13101 power of 2, since this case can never be reloaded from a reg. */
13102 gcc_assert (*amountp != -1);
13103 *amountp = int_log2 (*amountp);
13104 return ARM_LSL_NAME;
13107 gcc_unreachable ();
13110 if (*amountp != -1)
13112 /* This is not 100% correct, but follows from the desire to merge
13113 multiplication by a power of 2 with the recognizer for a
13114 shift. >=32 is not a valid shift for "lsl", so we must try and
13115 output a shift that produces the correct arithmetical result.
13116 Using lsr #32 is identical except for the fact that the carry bit
13117 is not set correctly if we set the flags; but we never use the
13118 carry bit from such an operation, so we can ignore that. */
13119 if (code == ROTATERT)
13120 /* Rotate is just modulo 32. */
13122 else if (*amountp != (*amountp & 31))
13124 if (code == ASHIFT)
13129 /* Shifts of 0 are no-ops. */
13137 /* Obtain the shift from the POWER of two. */
13139 static HOST_WIDE_INT
13140 int_log2 (HOST_WIDE_INT power)
13142 HOST_WIDE_INT shift = 0;
13144 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13146 gcc_assert (shift <= 31);
13153 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13154 because /bin/as is horribly restrictive. The judgement about
13155 whether or not each character is 'printable' (and can be output as
13156 is) or not (and must be printed with an octal escape) must be made
13157 with reference to the *host* character set -- the situation is
13158 similar to that discussed in the comments above pp_c_char in
13159 c-pretty-print.c. */
13161 #define MAX_ASCII_LEN 51
13164 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13167 int len_so_far = 0;
13169 fputs ("\t.ascii\t\"", stream);
13171 for (i = 0; i < len; i++)
13175 if (len_so_far >= MAX_ASCII_LEN)
13177 fputs ("\"\n\t.ascii\t\"", stream);
13183 if (c == '\\' || c == '\"')
13185 putc ('\\', stream);
13193 fprintf (stream, "\\%03o", c);
13198 fputs ("\"\n", stream);
13201 /* Compute the register save mask for registers 0 through 12
13202 inclusive. This code is used by arm_compute_save_reg_mask. */
13204 static unsigned long
13205 arm_compute_save_reg0_reg12_mask (void)
13207 unsigned long func_type = arm_current_func_type ();
13208 unsigned long save_reg_mask = 0;
13211 if (IS_INTERRUPT (func_type))
13213 unsigned int max_reg;
13214 /* Interrupt functions must not corrupt any registers,
13215 even call clobbered ones. If this is a leaf function
13216 we can just examine the registers used by the RTL, but
13217 otherwise we have to assume that whatever function is
13218 called might clobber anything, and so we have to save
13219 all the call-clobbered registers as well. */
13220 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13221 /* FIQ handlers have registers r8 - r12 banked, so
13222 we only need to check r0 - r7, Normal ISRs only
13223 bank r14 and r15, so we must check up to r12.
13224 r13 is the stack pointer which is always preserved,
13225 so we do not need to consider it here. */
13230 for (reg = 0; reg <= max_reg; reg++)
13231 if (df_regs_ever_live_p (reg)
13232 || (! current_function_is_leaf && call_used_regs[reg]))
13233 save_reg_mask |= (1 << reg);
13235 /* Also save the pic base register if necessary. */
13237 && !TARGET_SINGLE_PIC_BASE
13238 && arm_pic_register != INVALID_REGNUM
13239 && crtl->uses_pic_offset_table)
13240 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13242 else if (IS_VOLATILE(func_type))
13244 /* For noreturn functions we historically omitted register saves
13245 altogether. However this really messes up debugging. As a
13246 compromise save just the frame pointers. Combined with the link
13247 register saved elsewhere this should be sufficient to get
13249 if (frame_pointer_needed)
13250 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13251 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13252 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13253 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13254 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13258 /* In the normal case we only need to save those registers
13259 which are call saved and which are used by this function. */
13260 for (reg = 0; reg <= 11; reg++)
13261 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13262 save_reg_mask |= (1 << reg);
13264 /* Handle the frame pointer as a special case. */
13265 if (frame_pointer_needed)
13266 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13268 /* If we aren't loading the PIC register,
13269 don't stack it even though it may be live. */
13271 && !TARGET_SINGLE_PIC_BASE
13272 && arm_pic_register != INVALID_REGNUM
13273 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13274 || crtl->uses_pic_offset_table))
13275 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13277 /* The prologue will copy SP into R0, so save it. */
13278 if (IS_STACKALIGN (func_type))
13279 save_reg_mask |= 1;
13282 /* Save registers so the exception handler can modify them. */
13283 if (crtl->calls_eh_return)
13289 reg = EH_RETURN_DATA_REGNO (i);
13290 if (reg == INVALID_REGNUM)
13292 save_reg_mask |= 1 << reg;
13296 return save_reg_mask;
13300 /* Compute the number of bytes used to store the static chain register on the
13301 stack, above the stack frame. We need to know this accurately to get the
13302 alignment of the rest of the stack frame correct. */
13304 static int arm_compute_static_chain_stack_bytes (void)
13306 unsigned long func_type = arm_current_func_type ();
13307 int static_chain_stack_bytes = 0;
13309 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13310 IS_NESTED (func_type) &&
13311 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13312 static_chain_stack_bytes = 4;
13314 return static_chain_stack_bytes;
13318 /* Compute a bit mask of which registers need to be
13319 saved on the stack for the current function.
13320 This is used by arm_get_frame_offsets, which may add extra registers. */
13322 static unsigned long
13323 arm_compute_save_reg_mask (void)
13325 unsigned int save_reg_mask = 0;
13326 unsigned long func_type = arm_current_func_type ();
13329 if (IS_NAKED (func_type))
13330 /* This should never really happen. */
13333 /* If we are creating a stack frame, then we must save the frame pointer,
13334 IP (which will hold the old stack pointer), LR and the PC. */
13335 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13337 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13340 | (1 << PC_REGNUM);
13342 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13344 /* Decide if we need to save the link register.
13345 Interrupt routines have their own banked link register,
13346 so they never need to save it.
13347 Otherwise if we do not use the link register we do not need to save
13348 it. If we are pushing other registers onto the stack however, we
13349 can save an instruction in the epilogue by pushing the link register
13350 now and then popping it back into the PC. This incurs extra memory
13351 accesses though, so we only do it when optimizing for size, and only
13352 if we know that we will not need a fancy return sequence. */
13353 if (df_regs_ever_live_p (LR_REGNUM)
13356 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13357 && !crtl->calls_eh_return))
13358 save_reg_mask |= 1 << LR_REGNUM;
13360 if (cfun->machine->lr_save_eliminated)
13361 save_reg_mask &= ~ (1 << LR_REGNUM);
13363 if (TARGET_REALLY_IWMMXT
13364 && ((bit_count (save_reg_mask)
13365 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13366 arm_compute_static_chain_stack_bytes())
13369 /* The total number of registers that are going to be pushed
13370 onto the stack is odd. We need to ensure that the stack
13371 is 64-bit aligned before we start to save iWMMXt registers,
13372 and also before we start to create locals. (A local variable
13373 might be a double or long long which we will load/store using
13374 an iWMMXt instruction). Therefore we need to push another
13375 ARM register, so that the stack will be 64-bit aligned. We
13376 try to avoid using the arg registers (r0 -r3) as they might be
13377 used to pass values in a tail call. */
13378 for (reg = 4; reg <= 12; reg++)
13379 if ((save_reg_mask & (1 << reg)) == 0)
13383 save_reg_mask |= (1 << reg);
13386 cfun->machine->sibcall_blocked = 1;
13387 save_reg_mask |= (1 << 3);
13391 /* We may need to push an additional register for use initializing the
13392 PIC base register. */
13393 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13394 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13396 reg = thumb_find_work_register (1 << 4);
13397 if (!call_used_regs[reg])
13398 save_reg_mask |= (1 << reg);
13401 return save_reg_mask;
13405 /* Compute a bit mask of which registers need to be
13406 saved on the stack for the current function. */
13407 static unsigned long
13408 thumb1_compute_save_reg_mask (void)
13410 unsigned long mask;
13414 for (reg = 0; reg < 12; reg ++)
13415 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13419 && !TARGET_SINGLE_PIC_BASE
13420 && arm_pic_register != INVALID_REGNUM
13421 && crtl->uses_pic_offset_table)
13422 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13424 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13425 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13426 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13428 /* LR will also be pushed if any lo regs are pushed. */
13429 if (mask & 0xff || thumb_force_lr_save ())
13430 mask |= (1 << LR_REGNUM);
13432 /* Make sure we have a low work register if we need one.
13433 We will need one if we are going to push a high register,
13434 but we are not currently intending to push a low register. */
13435 if ((mask & 0xff) == 0
13436 && ((mask & 0x0f00) || TARGET_BACKTRACE))
13438 /* Use thumb_find_work_register to choose which register
13439 we will use. If the register is live then we will
13440 have to push it. Use LAST_LO_REGNUM as our fallback
13441 choice for the register to select. */
13442 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
13443 /* Make sure the register returned by thumb_find_work_register is
13444 not part of the return value. */
13445 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
13446 reg = LAST_LO_REGNUM;
13448 if (! call_used_regs[reg])
13452 /* The 504 below is 8 bytes less than 512 because there are two possible
13453 alignment words. We can't tell here if they will be present or not so we
13454 have to play it safe and assume that they are. */
13455 if ((CALLER_INTERWORKING_SLOT_SIZE +
13456 ROUND_UP_WORD (get_frame_size ()) +
13457 crtl->outgoing_args_size) >= 504)
13459 /* This is the same as the code in thumb1_expand_prologue() which
13460 determines which register to use for stack decrement. */
13461 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
13462 if (mask & (1 << reg))
13465 if (reg > LAST_LO_REGNUM)
13467 /* Make sure we have a register available for stack decrement. */
13468 mask |= 1 << LAST_LO_REGNUM;
13476 /* Return the number of bytes required to save VFP registers. */
13478 arm_get_vfp_saved_size (void)
13480 unsigned int regno;
13485 /* Space for saved VFP registers. */
13486 if (TARGET_HARD_FLOAT && TARGET_VFP)
13489 for (regno = FIRST_VFP_REGNUM;
13490 regno < LAST_VFP_REGNUM;
13493 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
13494 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
13498 /* Workaround ARM10 VFPr1 bug. */
13499 if (count == 2 && !arm_arch6)
13501 saved += count * 8;
13510 if (count == 2 && !arm_arch6)
13512 saved += count * 8;
13519 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13520 everything bar the final return instruction. */
13522 output_return_instruction (rtx operand, int really_return, int reverse)
13524 char conditional[10];
13527 unsigned long live_regs_mask;
13528 unsigned long func_type;
13529 arm_stack_offsets *offsets;
13531 func_type = arm_current_func_type ();
13533 if (IS_NAKED (func_type))
13536 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13538 /* If this function was declared non-returning, and we have
13539 found a tail call, then we have to trust that the called
13540 function won't return. */
13545 /* Otherwise, trap an attempted return by aborting. */
13547 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
13549 assemble_external_libcall (ops[1]);
13550 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
13556 gcc_assert (!cfun->calls_alloca || really_return);
13558 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
13560 cfun->machine->return_used_this_function = 1;
13562 offsets = arm_get_frame_offsets ();
13563 live_regs_mask = offsets->saved_regs_mask;
13565 if (live_regs_mask)
13567 const char * return_reg;
13569 /* If we do not have any special requirements for function exit
13570 (e.g. interworking) then we can load the return address
13571 directly into the PC. Otherwise we must load it into LR. */
13573 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
13574 return_reg = reg_names[PC_REGNUM];
13576 return_reg = reg_names[LR_REGNUM];
13578 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
13580 /* There are three possible reasons for the IP register
13581 being saved. 1) a stack frame was created, in which case
13582 IP contains the old stack pointer, or 2) an ISR routine
13583 corrupted it, or 3) it was saved to align the stack on
13584 iWMMXt. In case 1, restore IP into SP, otherwise just
13586 if (frame_pointer_needed)
13588 live_regs_mask &= ~ (1 << IP_REGNUM);
13589 live_regs_mask |= (1 << SP_REGNUM);
13592 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
13595 /* On some ARM architectures it is faster to use LDR rather than
13596 LDM to load a single register. On other architectures, the
13597 cost is the same. In 26 bit mode, or for exception handlers,
13598 we have to use LDM to load the PC so that the CPSR is also
13600 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13601 if (live_regs_mask == (1U << reg))
13604 if (reg <= LAST_ARM_REGNUM
13605 && (reg != LR_REGNUM
13607 || ! IS_INTERRUPT (func_type)))
13609 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
13610 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
13617 /* Generate the load multiple instruction to restore the
13618 registers. Note we can get here, even if
13619 frame_pointer_needed is true, but only if sp already
13620 points to the base of the saved core registers. */
13621 if (live_regs_mask & (1 << SP_REGNUM))
13623 unsigned HOST_WIDE_INT stack_adjust;
13625 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
13626 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
13628 if (stack_adjust && arm_arch5 && TARGET_ARM)
13629 if (TARGET_UNIFIED_ASM)
13630 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
13632 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
13635 /* If we can't use ldmib (SA110 bug),
13636 then try to pop r3 instead. */
13638 live_regs_mask |= 1 << 3;
13640 if (TARGET_UNIFIED_ASM)
13641 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
13643 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
13647 if (TARGET_UNIFIED_ASM)
13648 sprintf (instr, "pop%s\t{", conditional);
13650 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
13652 p = instr + strlen (instr);
13654 for (reg = 0; reg <= SP_REGNUM; reg++)
13655 if (live_regs_mask & (1 << reg))
13657 int l = strlen (reg_names[reg]);
13663 memcpy (p, ", ", 2);
13667 memcpy (p, "%|", 2);
13668 memcpy (p + 2, reg_names[reg], l);
13672 if (live_regs_mask & (1 << LR_REGNUM))
13674 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
13675 /* If returning from an interrupt, restore the CPSR. */
13676 if (IS_INTERRUPT (func_type))
13683 output_asm_insn (instr, & operand);
13685 /* See if we need to generate an extra instruction to
13686 perform the actual function return. */
13688 && func_type != ARM_FT_INTERWORKED
13689 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
13691 /* The return has already been handled
13692 by loading the LR into the PC. */
13699 switch ((int) ARM_FUNC_TYPE (func_type))
13703 /* ??? This is wrong for unified assembly syntax. */
13704 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
13707 case ARM_FT_INTERWORKED:
13708 sprintf (instr, "bx%s\t%%|lr", conditional);
13711 case ARM_FT_EXCEPTION:
13712 /* ??? This is wrong for unified assembly syntax. */
13713 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
13717 /* Use bx if it's available. */
13718 if (arm_arch5 || arm_arch4t)
13719 sprintf (instr, "bx%s\t%%|lr", conditional);
13721 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
13725 output_asm_insn (instr, & operand);
13731 /* Write the function name into the code section, directly preceding
13732 the function prologue.
13734 Code will be output similar to this:
13736 .ascii "arm_poke_function_name", 0
13739 .word 0xff000000 + (t1 - t0)
13740 arm_poke_function_name
13742 stmfd sp!, {fp, ip, lr, pc}
13745 When performing a stack backtrace, code can inspect the value
13746 of 'pc' stored at 'fp' + 0. If the trace function then looks
13747 at location pc - 12 and the top 8 bits are set, then we know
13748 that there is a function name embedded immediately preceding this
13749 location and has length ((pc[-3]) & 0xff000000).
13751 We assume that pc is declared as a pointer to an unsigned long.
13753 It is of no benefit to output the function name if we are assembling
13754 a leaf function. These function types will not contain a stack
13755 backtrace structure, therefore it is not possible to determine the
13758 arm_poke_function_name (FILE *stream, const char *name)
13760 unsigned long alignlength;
13761 unsigned long length;
13764 length = strlen (name) + 1;
13765 alignlength = ROUND_UP_WORD (length);
13767 ASM_OUTPUT_ASCII (stream, name, length);
13768 ASM_OUTPUT_ALIGN (stream, 2);
13769 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
13770 assemble_aligned_integer (UNITS_PER_WORD, x);
13773 /* Place some comments into the assembler stream
13774 describing the current function. */
13776 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
13778 unsigned long func_type;
13782 thumb1_output_function_prologue (f, frame_size);
13786 /* Sanity check. */
13787 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
13789 func_type = arm_current_func_type ();
13791 switch ((int) ARM_FUNC_TYPE (func_type))
13794 case ARM_FT_NORMAL:
13796 case ARM_FT_INTERWORKED:
13797 asm_fprintf (f, "\t%@ Function supports interworking.\n");
13800 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
13803 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
13805 case ARM_FT_EXCEPTION:
13806 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
13810 if (IS_NAKED (func_type))
13811 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
13813 if (IS_VOLATILE (func_type))
13814 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
13816 if (IS_NESTED (func_type))
13817 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
13818 if (IS_STACKALIGN (func_type))
13819 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
13821 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
13823 crtl->args.pretend_args_size, frame_size);
13825 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
13826 frame_pointer_needed,
13827 cfun->machine->uses_anonymous_args);
13829 if (cfun->machine->lr_save_eliminated)
13830 asm_fprintf (f, "\t%@ link register save eliminated.\n");
13832 if (crtl->calls_eh_return)
13833 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
13838 arm_output_epilogue (rtx sibling)
13841 unsigned long saved_regs_mask;
13842 unsigned long func_type;
13843 /* Floats_offset is the offset from the "virtual" frame. In an APCS
13844 frame that is $fp + 4 for a non-variadic function. */
13845 int floats_offset = 0;
13847 FILE * f = asm_out_file;
13848 unsigned int lrm_count = 0;
13849 int really_return = (sibling == NULL);
13851 arm_stack_offsets *offsets;
13853 /* If we have already generated the return instruction
13854 then it is futile to generate anything else. */
13855 if (use_return_insn (FALSE, sibling) &&
13856 (cfun->machine->return_used_this_function != 0))
13859 func_type = arm_current_func_type ();
13861 if (IS_NAKED (func_type))
13862 /* Naked functions don't have epilogues. */
13865 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13869 /* A volatile function should never return. Call abort. */
13870 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
13871 assemble_external_libcall (op);
13872 output_asm_insn ("bl\t%a0", &op);
13877 /* If we are throwing an exception, then we really must be doing a
13878 return, so we can't tail-call. */
13879 gcc_assert (!crtl->calls_eh_return || really_return);
13881 offsets = arm_get_frame_offsets ();
13882 saved_regs_mask = offsets->saved_regs_mask;
13885 lrm_count = bit_count (saved_regs_mask);
13887 floats_offset = offsets->saved_args;
13888 /* Compute how far away the floats will be. */
13889 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13890 if (saved_regs_mask & (1 << reg))
13891 floats_offset += 4;
13893 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13895 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
13896 int vfp_offset = offsets->frame;
13898 if (TARGET_FPA_EMU2)
13900 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13901 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13903 floats_offset += 12;
13904 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
13905 reg, FP_REGNUM, floats_offset - vfp_offset);
13910 start_reg = LAST_FPA_REGNUM;
13912 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13914 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13916 floats_offset += 12;
13918 /* We can't unstack more than four registers at once. */
13919 if (start_reg - reg == 3)
13921 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
13922 reg, FP_REGNUM, floats_offset - vfp_offset);
13923 start_reg = reg - 1;
13928 if (reg != start_reg)
13929 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13930 reg + 1, start_reg - reg,
13931 FP_REGNUM, floats_offset - vfp_offset);
13932 start_reg = reg - 1;
13936 /* Just in case the last register checked also needs unstacking. */
13937 if (reg != start_reg)
13938 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13939 reg + 1, start_reg - reg,
13940 FP_REGNUM, floats_offset - vfp_offset);
13943 if (TARGET_HARD_FLOAT && TARGET_VFP)
13947 /* The fldmd insns do not have base+offset addressing
13948 modes, so we use IP to hold the address. */
13949 saved_size = arm_get_vfp_saved_size ();
13951 if (saved_size > 0)
13953 floats_offset += saved_size;
13954 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
13955 FP_REGNUM, floats_offset - vfp_offset);
13957 start_reg = FIRST_VFP_REGNUM;
13958 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13960 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13961 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13963 if (start_reg != reg)
13964 vfp_output_fldmd (f, IP_REGNUM,
13965 (start_reg - FIRST_VFP_REGNUM) / 2,
13966 (reg - start_reg) / 2);
13967 start_reg = reg + 2;
13970 if (start_reg != reg)
13971 vfp_output_fldmd (f, IP_REGNUM,
13972 (start_reg - FIRST_VFP_REGNUM) / 2,
13973 (reg - start_reg) / 2);
13978 /* The frame pointer is guaranteed to be non-double-word aligned.
13979 This is because it is set to (old_stack_pointer - 4) and the
13980 old_stack_pointer was double word aligned. Thus the offset to
13981 the iWMMXt registers to be loaded must also be non-double-word
13982 sized, so that the resultant address *is* double-word aligned.
13983 We can ignore floats_offset since that was already included in
13984 the live_regs_mask. */
13985 lrm_count += (lrm_count % 2 ? 2 : 1);
13987 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13988 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13990 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
13991 reg, FP_REGNUM, lrm_count * 4);
13996 /* saved_regs_mask should contain the IP, which at the time of stack
13997 frame generation actually contains the old stack pointer. So a
13998 quick way to unwind the stack is just pop the IP register directly
13999 into the stack pointer. */
14000 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14001 saved_regs_mask &= ~ (1 << IP_REGNUM);
14002 saved_regs_mask |= (1 << SP_REGNUM);
14004 /* There are two registers left in saved_regs_mask - LR and PC. We
14005 only need to restore the LR register (the return address), but to
14006 save time we can load it directly into the PC, unless we need a
14007 special function exit sequence, or we are not really returning. */
14009 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14010 && !crtl->calls_eh_return)
14011 /* Delete the LR from the register mask, so that the LR on
14012 the stack is loaded into the PC in the register mask. */
14013 saved_regs_mask &= ~ (1 << LR_REGNUM);
14015 saved_regs_mask &= ~ (1 << PC_REGNUM);
14017 /* We must use SP as the base register, because SP is one of the
14018 registers being restored. If an interrupt or page fault
14019 happens in the ldm instruction, the SP might or might not
14020 have been restored. That would be bad, as then SP will no
14021 longer indicate the safe area of stack, and we can get stack
14022 corruption. Using SP as the base register means that it will
14023 be reset correctly to the original value, should an interrupt
14024 occur. If the stack pointer already points at the right
14025 place, then omit the subtraction. */
14026 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14027 || cfun->calls_alloca)
14028 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14029 4 * bit_count (saved_regs_mask));
14030 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14032 if (IS_INTERRUPT (func_type))
14033 /* Interrupt handlers will have pushed the
14034 IP onto the stack, so restore it now. */
14035 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14039 /* This branch is executed for ARM mode (non-apcs frames) and
14040 Thumb-2 mode. Frame layout is essentially the same for those
14041 cases, except that in ARM mode frame pointer points to the
14042 first saved register, while in Thumb-2 mode the frame pointer points
14043 to the last saved register.
14045 It is possible to make frame pointer point to last saved
14046 register in both cases, and remove some conditionals below.
14047 That means that fp setup in prologue would be just "mov fp, sp"
14048 and sp restore in epilogue would be just "mov sp, fp", whereas
14049 now we have to use add/sub in those cases. However, the value
14050 of that would be marginal, as both mov and add/sub are 32-bit
14051 in ARM mode, and it would require extra conditionals
14052 in arm_expand_prologue to distingish ARM-apcs-frame case
14053 (where frame pointer is required to point at first register)
14054 and ARM-non-apcs-frame. Therefore, such change is postponed
14055 until real need arise. */
14056 unsigned HOST_WIDE_INT amount;
14058 /* Restore stack pointer if necessary. */
14059 if (TARGET_ARM && frame_pointer_needed)
14061 operands[0] = stack_pointer_rtx;
14062 operands[1] = hard_frame_pointer_rtx;
14064 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14065 output_add_immediate (operands);
14069 if (frame_pointer_needed)
14071 /* For Thumb-2 restore sp from the frame pointer.
14072 Operand restrictions mean we have to incrememnt FP, then copy
14074 amount = offsets->locals_base - offsets->saved_regs;
14075 operands[0] = hard_frame_pointer_rtx;
14079 unsigned long count;
14080 operands[0] = stack_pointer_rtx;
14081 amount = offsets->outgoing_args - offsets->saved_regs;
14082 /* pop call clobbered registers if it avoids a
14083 separate stack adjustment. */
14084 count = offsets->saved_regs - offsets->saved_args;
14087 && !crtl->calls_eh_return
14088 && bit_count(saved_regs_mask) * 4 == count
14089 && !IS_INTERRUPT (func_type)
14090 && !crtl->tail_call_emit)
14092 unsigned long mask;
14093 mask = (1 << (arm_size_return_regs() / 4)) - 1;
14095 mask &= ~saved_regs_mask;
14097 while (bit_count (mask) * 4 > amount)
14099 while ((mask & (1 << reg)) == 0)
14101 mask &= ~(1 << reg);
14103 if (bit_count (mask) * 4 == amount) {
14105 saved_regs_mask |= mask;
14112 operands[1] = operands[0];
14113 operands[2] = GEN_INT (amount);
14114 output_add_immediate (operands);
14116 if (frame_pointer_needed)
14117 asm_fprintf (f, "\tmov\t%r, %r\n",
14118 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14121 if (TARGET_FPA_EMU2)
14123 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14124 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14125 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14130 start_reg = FIRST_FPA_REGNUM;
14132 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14134 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14136 if (reg - start_reg == 3)
14138 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14139 start_reg, SP_REGNUM);
14140 start_reg = reg + 1;
14145 if (reg != start_reg)
14146 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14147 start_reg, reg - start_reg,
14150 start_reg = reg + 1;
14154 /* Just in case the last register checked also needs unstacking. */
14155 if (reg != start_reg)
14156 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14157 start_reg, reg - start_reg, SP_REGNUM);
14160 if (TARGET_HARD_FLOAT && TARGET_VFP)
14162 int end_reg = LAST_VFP_REGNUM + 1;
14164 /* Scan the registers in reverse order. We need to match
14165 any groupings made in the prologue and generate matching
14167 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14169 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14170 && (!df_regs_ever_live_p (reg + 1)
14171 || call_used_regs[reg + 1]))
14173 if (end_reg > reg + 2)
14174 vfp_output_fldmd (f, SP_REGNUM,
14175 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14176 (end_reg - (reg + 2)) / 2);
14180 if (end_reg > reg + 2)
14181 vfp_output_fldmd (f, SP_REGNUM, 0,
14182 (end_reg - (reg + 2)) / 2);
14186 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14187 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14188 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14190 /* If we can, restore the LR into the PC. */
14191 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14192 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14193 && !IS_STACKALIGN (func_type)
14195 && crtl->args.pretend_args_size == 0
14196 && saved_regs_mask & (1 << LR_REGNUM)
14197 && !crtl->calls_eh_return)
14199 saved_regs_mask &= ~ (1 << LR_REGNUM);
14200 saved_regs_mask |= (1 << PC_REGNUM);
14201 rfe = IS_INTERRUPT (func_type);
14206 /* Load the registers off the stack. If we only have one register
14207 to load use the LDR instruction - it is faster. For Thumb-2
14208 always use pop and the assembler will pick the best instruction.*/
14209 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14210 && !IS_INTERRUPT(func_type))
14212 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14214 else if (saved_regs_mask)
14216 if (saved_regs_mask & (1 << SP_REGNUM))
14217 /* Note - write back to the stack register is not enabled
14218 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14219 in the list of registers and if we add writeback the
14220 instruction becomes UNPREDICTABLE. */
14221 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14223 else if (TARGET_ARM)
14224 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14227 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14230 if (crtl->args.pretend_args_size)
14232 /* Unwind the pre-pushed regs. */
14233 operands[0] = operands[1] = stack_pointer_rtx;
14234 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14235 output_add_immediate (operands);
14239 /* We may have already restored PC directly from the stack. */
14240 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14243 /* Stack adjustment for exception handler. */
14244 if (crtl->calls_eh_return)
14245 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14246 ARM_EH_STACKADJ_REGNUM);
14248 /* Generate the return instruction. */
14249 switch ((int) ARM_FUNC_TYPE (func_type))
14253 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14256 case ARM_FT_EXCEPTION:
14257 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14260 case ARM_FT_INTERWORKED:
14261 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14265 if (IS_STACKALIGN (func_type))
14267 /* See comment in arm_expand_prologue. */
14268 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14270 if (arm_arch5 || arm_arch4t)
14271 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14273 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14281 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14282 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14284 arm_stack_offsets *offsets;
14290 /* Emit any call-via-reg trampolines that are needed for v4t support
14291 of call_reg and call_value_reg type insns. */
14292 for (regno = 0; regno < LR_REGNUM; regno++)
14294 rtx label = cfun->machine->call_via[regno];
14298 switch_to_section (function_section (current_function_decl));
14299 targetm.asm_out.internal_label (asm_out_file, "L",
14300 CODE_LABEL_NUMBER (label));
14301 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14305 /* ??? Probably not safe to set this here, since it assumes that a
14306 function will be emitted as assembly immediately after we generate
14307 RTL for it. This does not happen for inline functions. */
14308 cfun->machine->return_used_this_function = 0;
14310 else /* TARGET_32BIT */
14312 /* We need to take into account any stack-frame rounding. */
14313 offsets = arm_get_frame_offsets ();
14315 gcc_assert (!use_return_insn (FALSE, NULL)
14316 || (cfun->machine->return_used_this_function != 0)
14317 || offsets->saved_regs == offsets->outgoing_args
14318 || frame_pointer_needed);
14320 /* Reset the ARM-specific per-function variables. */
14321 after_arm_reorg = 0;
14325 /* Generate and emit an insn that we will recognize as a push_multi.
14326 Unfortunately, since this insn does not reflect very well the actual
14327 semantics of the operation, we need to annotate the insn for the benefit
14328 of DWARF2 frame unwind information. */
14330 emit_multi_reg_push (unsigned long mask)
14333 int num_dwarf_regs;
14337 int dwarf_par_index;
14340 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14341 if (mask & (1 << i))
14344 gcc_assert (num_regs && num_regs <= 16);
14346 /* We don't record the PC in the dwarf frame information. */
14347 num_dwarf_regs = num_regs;
14348 if (mask & (1 << PC_REGNUM))
14351 /* For the body of the insn we are going to generate an UNSPEC in
14352 parallel with several USEs. This allows the insn to be recognized
14353 by the push_multi pattern in the arm.md file.
14355 The body of the insn looks something like this:
14358 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14359 (const_int:SI <num>)))
14360 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14366 For the frame note however, we try to be more explicit and actually
14367 show each register being stored into the stack frame, plus a (single)
14368 decrement of the stack pointer. We do it this way in order to be
14369 friendly to the stack unwinding code, which only wants to see a single
14370 stack decrement per instruction. The RTL we generate for the note looks
14371 something like this:
14374 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14375 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14376 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14377 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14381 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14382 instead we'd have a parallel expression detailing all
14383 the stores to the various memory addresses so that debug
14384 information is more up-to-date. Remember however while writing
14385 this to take care of the constraints with the push instruction.
14387 Note also that this has to be taken care of for the VFP registers.
14389 For more see PR43399. */
14391 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14392 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14393 dwarf_par_index = 1;
14395 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14397 if (mask & (1 << i))
14399 reg = gen_rtx_REG (SImode, i);
14401 XVECEXP (par, 0, 0)
14402 = gen_rtx_SET (VOIDmode,
14405 gen_rtx_PRE_MODIFY (Pmode,
14408 (stack_pointer_rtx,
14411 gen_rtx_UNSPEC (BLKmode,
14412 gen_rtvec (1, reg),
14413 UNSPEC_PUSH_MULT));
14415 if (i != PC_REGNUM)
14417 tmp = gen_rtx_SET (VOIDmode,
14418 gen_frame_mem (SImode, stack_pointer_rtx),
14420 RTX_FRAME_RELATED_P (tmp) = 1;
14421 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
14429 for (j = 1, i++; j < num_regs; i++)
14431 if (mask & (1 << i))
14433 reg = gen_rtx_REG (SImode, i);
14435 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
14437 if (i != PC_REGNUM)
14440 = gen_rtx_SET (VOIDmode,
14443 plus_constant (stack_pointer_rtx,
14446 RTX_FRAME_RELATED_P (tmp) = 1;
14447 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
14454 par = emit_insn (par);
14456 tmp = gen_rtx_SET (VOIDmode,
14458 plus_constant (stack_pointer_rtx, -4 * num_regs));
14459 RTX_FRAME_RELATED_P (tmp) = 1;
14460 XVECEXP (dwarf, 0, 0) = tmp;
14462 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14467 /* Calculate the size of the return value that is passed in registers. */
14469 arm_size_return_regs (void)
14471 enum machine_mode mode;
14473 if (crtl->return_rtx != 0)
14474 mode = GET_MODE (crtl->return_rtx);
14476 mode = DECL_MODE (DECL_RESULT (current_function_decl));
14478 return GET_MODE_SIZE (mode);
14482 emit_sfm (int base_reg, int count)
14489 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14490 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14492 reg = gen_rtx_REG (XFmode, base_reg++);
14494 XVECEXP (par, 0, 0)
14495 = gen_rtx_SET (VOIDmode,
14498 gen_rtx_PRE_MODIFY (Pmode,
14501 (stack_pointer_rtx,
14504 gen_rtx_UNSPEC (BLKmode,
14505 gen_rtvec (1, reg),
14506 UNSPEC_PUSH_MULT));
14507 tmp = gen_rtx_SET (VOIDmode,
14508 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
14509 RTX_FRAME_RELATED_P (tmp) = 1;
14510 XVECEXP (dwarf, 0, 1) = tmp;
14512 for (i = 1; i < count; i++)
14514 reg = gen_rtx_REG (XFmode, base_reg++);
14515 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14517 tmp = gen_rtx_SET (VOIDmode,
14518 gen_frame_mem (XFmode,
14519 plus_constant (stack_pointer_rtx,
14522 RTX_FRAME_RELATED_P (tmp) = 1;
14523 XVECEXP (dwarf, 0, i + 1) = tmp;
14526 tmp = gen_rtx_SET (VOIDmode,
14528 plus_constant (stack_pointer_rtx, -12 * count));
14530 RTX_FRAME_RELATED_P (tmp) = 1;
14531 XVECEXP (dwarf, 0, 0) = tmp;
14533 par = emit_insn (par);
14534 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14540 /* Return true if the current function needs to save/restore LR. */
14543 thumb_force_lr_save (void)
14545 return !cfun->machine->lr_save_eliminated
14546 && (!leaf_function_p ()
14547 || thumb_far_jump_used_p ()
14548 || df_regs_ever_live_p (LR_REGNUM));
14552 /* Compute the distance from register FROM to register TO.
14553 These can be the arg pointer (26), the soft frame pointer (25),
14554 the stack pointer (13) or the hard frame pointer (11).
14555 In thumb mode r7 is used as the soft frame pointer, if needed.
14556 Typical stack layout looks like this:
14558 old stack pointer -> | |
14561 | | saved arguments for
14562 | | vararg functions
14565 hard FP & arg pointer -> | | \
14573 soft frame pointer -> | | /
14578 locals base pointer -> | | /
14583 current stack pointer -> | | /
14586 For a given function some or all of these stack components
14587 may not be needed, giving rise to the possibility of
14588 eliminating some of the registers.
14590 The values returned by this function must reflect the behavior
14591 of arm_expand_prologue() and arm_compute_save_reg_mask().
14593 The sign of the number returned reflects the direction of stack
14594 growth, so the values are positive for all eliminations except
14595 from the soft frame pointer to the hard frame pointer.
14597 SFP may point just inside the local variables block to ensure correct
14601 /* Calculate stack offsets. These are used to calculate register elimination
14602 offsets and in prologue/epilogue code. Also calculates which registers
14603 should be saved. */
14605 static arm_stack_offsets *
14606 arm_get_frame_offsets (void)
14608 struct arm_stack_offsets *offsets;
14609 unsigned long func_type;
14613 HOST_WIDE_INT frame_size;
14616 offsets = &cfun->machine->stack_offsets;
14618 /* We need to know if we are a leaf function. Unfortunately, it
14619 is possible to be called after start_sequence has been called,
14620 which causes get_insns to return the insns for the sequence,
14621 not the function, which will cause leaf_function_p to return
14622 the incorrect result.
14624 to know about leaf functions once reload has completed, and the
14625 frame size cannot be changed after that time, so we can safely
14626 use the cached value. */
14628 if (reload_completed)
14631 /* Initially this is the size of the local variables. It will translated
14632 into an offset once we have determined the size of preceding data. */
14633 frame_size = ROUND_UP_WORD (get_frame_size ());
14635 leaf = leaf_function_p ();
14637 /* Space for variadic functions. */
14638 offsets->saved_args = crtl->args.pretend_args_size;
14640 /* In Thumb mode this is incorrect, but never used. */
14641 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
14642 arm_compute_static_chain_stack_bytes();
14646 unsigned int regno;
14648 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
14649 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14650 saved = core_saved;
14652 /* We know that SP will be doubleword aligned on entry, and we must
14653 preserve that condition at any subroutine call. We also require the
14654 soft frame pointer to be doubleword aligned. */
14656 if (TARGET_REALLY_IWMMXT)
14658 /* Check for the call-saved iWMMXt registers. */
14659 for (regno = FIRST_IWMMXT_REGNUM;
14660 regno <= LAST_IWMMXT_REGNUM;
14662 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14666 func_type = arm_current_func_type ();
14667 if (! IS_VOLATILE (func_type))
14669 /* Space for saved FPA registers. */
14670 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
14671 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14674 /* Space for saved VFP registers. */
14675 if (TARGET_HARD_FLOAT && TARGET_VFP)
14676 saved += arm_get_vfp_saved_size ();
14679 else /* TARGET_THUMB1 */
14681 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
14682 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14683 saved = core_saved;
14684 if (TARGET_BACKTRACE)
14688 /* Saved registers include the stack frame. */
14689 offsets->saved_regs = offsets->saved_args + saved +
14690 arm_compute_static_chain_stack_bytes();
14691 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
14692 /* A leaf function does not need any stack alignment if it has nothing
14694 if (leaf && frame_size == 0)
14696 offsets->outgoing_args = offsets->soft_frame;
14697 offsets->locals_base = offsets->soft_frame;
14701 /* Ensure SFP has the correct alignment. */
14702 if (ARM_DOUBLEWORD_ALIGN
14703 && (offsets->soft_frame & 7))
14705 offsets->soft_frame += 4;
14706 /* Try to align stack by pushing an extra reg. Don't bother doing this
14707 when there is a stack frame as the alignment will be rolled into
14708 the normal stack adjustment. */
14709 if (frame_size + crtl->outgoing_args_size == 0)
14713 /* If it is safe to use r3, then do so. This sometimes
14714 generates better code on Thumb-2 by avoiding the need to
14715 use 32-bit push/pop instructions. */
14716 if (!crtl->tail_call_emit
14717 && arm_size_return_regs () <= 12
14718 && (offsets->saved_regs_mask & (1 << 3)) == 0)
14723 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
14725 if ((offsets->saved_regs_mask & (1 << i)) == 0)
14734 offsets->saved_regs += 4;
14735 offsets->saved_regs_mask |= (1 << reg);
14740 offsets->locals_base = offsets->soft_frame + frame_size;
14741 offsets->outgoing_args = (offsets->locals_base
14742 + crtl->outgoing_args_size);
14744 if (ARM_DOUBLEWORD_ALIGN)
14746 /* Ensure SP remains doubleword aligned. */
14747 if (offsets->outgoing_args & 7)
14748 offsets->outgoing_args += 4;
14749 gcc_assert (!(offsets->outgoing_args & 7));
14756 /* Calculate the relative offsets for the different stack pointers. Positive
14757 offsets are in the direction of stack growth. */
14760 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
14762 arm_stack_offsets *offsets;
14764 offsets = arm_get_frame_offsets ();
14766 /* OK, now we have enough information to compute the distances.
14767 There must be an entry in these switch tables for each pair
14768 of registers in ELIMINABLE_REGS, even if some of the entries
14769 seem to be redundant or useless. */
14772 case ARG_POINTER_REGNUM:
14775 case THUMB_HARD_FRAME_POINTER_REGNUM:
14778 case FRAME_POINTER_REGNUM:
14779 /* This is the reverse of the soft frame pointer
14780 to hard frame pointer elimination below. */
14781 return offsets->soft_frame - offsets->saved_args;
14783 case ARM_HARD_FRAME_POINTER_REGNUM:
14784 /* This is only non-zero in the case where the static chain register
14785 is stored above the frame. */
14786 return offsets->frame - offsets->saved_args - 4;
14788 case STACK_POINTER_REGNUM:
14789 /* If nothing has been pushed on the stack at all
14790 then this will return -4. This *is* correct! */
14791 return offsets->outgoing_args - (offsets->saved_args + 4);
14794 gcc_unreachable ();
14796 gcc_unreachable ();
14798 case FRAME_POINTER_REGNUM:
14801 case THUMB_HARD_FRAME_POINTER_REGNUM:
14804 case ARM_HARD_FRAME_POINTER_REGNUM:
14805 /* The hard frame pointer points to the top entry in the
14806 stack frame. The soft frame pointer to the bottom entry
14807 in the stack frame. If there is no stack frame at all,
14808 then they are identical. */
14810 return offsets->frame - offsets->soft_frame;
14812 case STACK_POINTER_REGNUM:
14813 return offsets->outgoing_args - offsets->soft_frame;
14816 gcc_unreachable ();
14818 gcc_unreachable ();
14821 /* You cannot eliminate from the stack pointer.
14822 In theory you could eliminate from the hard frame
14823 pointer to the stack pointer, but this will never
14824 happen, since if a stack frame is not needed the
14825 hard frame pointer will never be used. */
14826 gcc_unreachable ();
14830 /* Given FROM and TO register numbers, say whether this elimination is
14831 allowed. Frame pointer elimination is automatically handled.
14833 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
14834 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
14835 pointer, we must eliminate FRAME_POINTER_REGNUM into
14836 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
14837 ARG_POINTER_REGNUM. */
14840 arm_can_eliminate (const int from, const int to)
14842 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
14843 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
14844 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
14845 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
14849 /* Emit RTL to save coprocessor registers on function entry. Returns the
14850 number of bytes pushed. */
14853 arm_save_coproc_regs(void)
14855 int saved_size = 0;
14857 unsigned start_reg;
14860 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14861 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14863 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14864 insn = gen_rtx_MEM (V2SImode, insn);
14865 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
14866 RTX_FRAME_RELATED_P (insn) = 1;
14870 /* Save any floating point call-saved registers used by this
14872 if (TARGET_FPA_EMU2)
14874 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14875 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14877 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14878 insn = gen_rtx_MEM (XFmode, insn);
14879 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
14880 RTX_FRAME_RELATED_P (insn) = 1;
14886 start_reg = LAST_FPA_REGNUM;
14888 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14890 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14892 if (start_reg - reg == 3)
14894 insn = emit_sfm (reg, 4);
14895 RTX_FRAME_RELATED_P (insn) = 1;
14897 start_reg = reg - 1;
14902 if (start_reg != reg)
14904 insn = emit_sfm (reg + 1, start_reg - reg);
14905 RTX_FRAME_RELATED_P (insn) = 1;
14906 saved_size += (start_reg - reg) * 12;
14908 start_reg = reg - 1;
14912 if (start_reg != reg)
14914 insn = emit_sfm (reg + 1, start_reg - reg);
14915 saved_size += (start_reg - reg) * 12;
14916 RTX_FRAME_RELATED_P (insn) = 1;
14919 if (TARGET_HARD_FLOAT && TARGET_VFP)
14921 start_reg = FIRST_VFP_REGNUM;
14923 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14925 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14926 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14928 if (start_reg != reg)
14929 saved_size += vfp_emit_fstmd (start_reg,
14930 (reg - start_reg) / 2);
14931 start_reg = reg + 2;
14934 if (start_reg != reg)
14935 saved_size += vfp_emit_fstmd (start_reg,
14936 (reg - start_reg) / 2);
14942 /* Set the Thumb frame pointer from the stack pointer. */
14945 thumb_set_frame_pointer (arm_stack_offsets *offsets)
14947 HOST_WIDE_INT amount;
14950 amount = offsets->outgoing_args - offsets->locals_base;
14952 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14953 stack_pointer_rtx, GEN_INT (amount)));
14956 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
14957 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
14958 expects the first two operands to be the same. */
14961 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14963 hard_frame_pointer_rtx));
14967 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14968 hard_frame_pointer_rtx,
14969 stack_pointer_rtx));
14971 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
14972 plus_constant (stack_pointer_rtx, amount));
14973 RTX_FRAME_RELATED_P (dwarf) = 1;
14974 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14977 RTX_FRAME_RELATED_P (insn) = 1;
14980 /* Generate the prologue instructions for entry into an ARM or Thumb-2
14983 arm_expand_prologue (void)
14988 unsigned long live_regs_mask;
14989 unsigned long func_type;
14991 int saved_pretend_args = 0;
14992 int saved_regs = 0;
14993 unsigned HOST_WIDE_INT args_to_push;
14994 arm_stack_offsets *offsets;
14996 func_type = arm_current_func_type ();
14998 /* Naked functions don't have prologues. */
14999 if (IS_NAKED (func_type))
15002 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15003 args_to_push = crtl->args.pretend_args_size;
15005 /* Compute which register we will have to save onto the stack. */
15006 offsets = arm_get_frame_offsets ();
15007 live_regs_mask = offsets->saved_regs_mask;
15009 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15011 if (IS_STACKALIGN (func_type))
15016 /* Handle a word-aligned stack pointer. We generate the following:
15021 <save and restore r0 in normal prologue/epilogue>
15025 The unwinder doesn't need to know about the stack realignment.
15026 Just tell it we saved SP in r0. */
15027 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15029 r0 = gen_rtx_REG (SImode, 0);
15030 r1 = gen_rtx_REG (SImode, 1);
15031 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15032 compiler won't choke. */
15033 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15034 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15035 insn = gen_movsi (r0, stack_pointer_rtx);
15036 RTX_FRAME_RELATED_P (insn) = 1;
15037 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15039 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15040 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15043 /* For APCS frames, if IP register is clobbered
15044 when creating frame, save that register in a special
15046 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15048 if (IS_INTERRUPT (func_type))
15050 /* Interrupt functions must not corrupt any registers.
15051 Creating a frame pointer however, corrupts the IP
15052 register, so we must push it first. */
15053 insn = emit_multi_reg_push (1 << IP_REGNUM);
15055 /* Do not set RTX_FRAME_RELATED_P on this insn.
15056 The dwarf stack unwinding code only wants to see one
15057 stack decrement per function, and this is not it. If
15058 this instruction is labeled as being part of the frame
15059 creation sequence then dwarf2out_frame_debug_expr will
15060 die when it encounters the assignment of IP to FP
15061 later on, since the use of SP here establishes SP as
15062 the CFA register and not IP.
15064 Anyway this instruction is not really part of the stack
15065 frame creation although it is part of the prologue. */
15067 else if (IS_NESTED (func_type))
15069 /* The Static chain register is the same as the IP register
15070 used as a scratch register during stack frame creation.
15071 To get around this need to find somewhere to store IP
15072 whilst the frame is being created. We try the following
15075 1. The last argument register.
15076 2. A slot on the stack above the frame. (This only
15077 works if the function is not a varargs function).
15078 3. Register r3, after pushing the argument registers
15081 Note - we only need to tell the dwarf2 backend about the SP
15082 adjustment in the second variant; the static chain register
15083 doesn't need to be unwound, as it doesn't contain a value
15084 inherited from the caller. */
15086 if (df_regs_ever_live_p (3) == false)
15087 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15088 else if (args_to_push == 0)
15092 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15095 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15096 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15099 /* Just tell the dwarf backend that we adjusted SP. */
15100 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15101 plus_constant (stack_pointer_rtx,
15103 RTX_FRAME_RELATED_P (insn) = 1;
15104 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15108 /* Store the args on the stack. */
15109 if (cfun->machine->uses_anonymous_args)
15110 insn = emit_multi_reg_push
15111 ((0xf0 >> (args_to_push / 4)) & 0xf);
15114 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15115 GEN_INT (- args_to_push)));
15117 RTX_FRAME_RELATED_P (insn) = 1;
15119 saved_pretend_args = 1;
15120 fp_offset = args_to_push;
15123 /* Now reuse r3 to preserve IP. */
15124 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15128 insn = emit_set_insn (ip_rtx,
15129 plus_constant (stack_pointer_rtx, fp_offset));
15130 RTX_FRAME_RELATED_P (insn) = 1;
15135 /* Push the argument registers, or reserve space for them. */
15136 if (cfun->machine->uses_anonymous_args)
15137 insn = emit_multi_reg_push
15138 ((0xf0 >> (args_to_push / 4)) & 0xf);
15141 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15142 GEN_INT (- args_to_push)));
15143 RTX_FRAME_RELATED_P (insn) = 1;
15146 /* If this is an interrupt service routine, and the link register
15147 is going to be pushed, and we're not generating extra
15148 push of IP (needed when frame is needed and frame layout if apcs),
15149 subtracting four from LR now will mean that the function return
15150 can be done with a single instruction. */
15151 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15152 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15153 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15156 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15158 emit_set_insn (lr, plus_constant (lr, -4));
15161 if (live_regs_mask)
15163 saved_regs += bit_count (live_regs_mask) * 4;
15164 if (optimize_size && !frame_pointer_needed
15165 && saved_regs == offsets->saved_regs - offsets->saved_args)
15167 /* If no coprocessor registers are being pushed and we don't have
15168 to worry about a frame pointer then push extra registers to
15169 create the stack frame. This is done is a way that does not
15170 alter the frame layout, so is independent of the epilogue. */
15174 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15176 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15177 if (frame && n * 4 >= frame)
15180 live_regs_mask |= (1 << n) - 1;
15181 saved_regs += frame;
15184 insn = emit_multi_reg_push (live_regs_mask);
15185 RTX_FRAME_RELATED_P (insn) = 1;
15188 if (! IS_VOLATILE (func_type))
15189 saved_regs += arm_save_coproc_regs ();
15191 if (frame_pointer_needed && TARGET_ARM)
15193 /* Create the new frame pointer. */
15194 if (TARGET_APCS_FRAME)
15196 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15197 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15198 RTX_FRAME_RELATED_P (insn) = 1;
15200 if (IS_NESTED (func_type))
15202 /* Recover the static chain register. */
15203 if (!df_regs_ever_live_p (3)
15204 || saved_pretend_args)
15205 insn = gen_rtx_REG (SImode, 3);
15206 else /* if (crtl->args.pretend_args_size == 0) */
15208 insn = plus_constant (hard_frame_pointer_rtx, 4);
15209 insn = gen_frame_mem (SImode, insn);
15211 emit_set_insn (ip_rtx, insn);
15212 /* Add a USE to stop propagate_one_insn() from barfing. */
15213 emit_insn (gen_prologue_use (ip_rtx));
15218 insn = GEN_INT (saved_regs - 4);
15219 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15220 stack_pointer_rtx, insn));
15221 RTX_FRAME_RELATED_P (insn) = 1;
15225 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15227 /* This add can produce multiple insns for a large constant, so we
15228 need to get tricky. */
15229 rtx last = get_last_insn ();
15231 amount = GEN_INT (offsets->saved_args + saved_regs
15232 - offsets->outgoing_args);
15234 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15238 last = last ? NEXT_INSN (last) : get_insns ();
15239 RTX_FRAME_RELATED_P (last) = 1;
15241 while (last != insn);
15243 /* If the frame pointer is needed, emit a special barrier that
15244 will prevent the scheduler from moving stores to the frame
15245 before the stack adjustment. */
15246 if (frame_pointer_needed)
15247 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15248 hard_frame_pointer_rtx));
15252 if (frame_pointer_needed && TARGET_THUMB2)
15253 thumb_set_frame_pointer (offsets);
15255 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15257 unsigned long mask;
15259 mask = live_regs_mask;
15260 mask &= THUMB2_WORK_REGS;
15261 if (!IS_NESTED (func_type))
15262 mask |= (1 << IP_REGNUM);
15263 arm_load_pic_register (mask);
15266 /* If we are profiling, make sure no instructions are scheduled before
15267 the call to mcount. Similarly if the user has requested no
15268 scheduling in the prolog. Similarly if we want non-call exceptions
15269 using the EABI unwinder, to prevent faulting instructions from being
15270 swapped with a stack adjustment. */
15271 if (crtl->profile || !TARGET_SCHED_PROLOG
15272 || (ARM_EABI_UNWIND_TABLES && cfun->can_throw_non_call_exceptions))
15273 emit_insn (gen_blockage ());
15275 /* If the link register is being kept alive, with the return address in it,
15276 then make sure that it does not get reused by the ce2 pass. */
15277 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15278 cfun->machine->lr_save_eliminated = 1;
15281 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15283 arm_print_condition (FILE *stream)
15285 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15287 /* Branch conversion is not implemented for Thumb-2. */
15290 output_operand_lossage ("predicated Thumb instruction");
15293 if (current_insn_predicate != NULL)
15295 output_operand_lossage
15296 ("predicated instruction in conditional sequence");
15300 fputs (arm_condition_codes[arm_current_cc], stream);
15302 else if (current_insn_predicate)
15304 enum arm_cond_code code;
15308 output_operand_lossage ("predicated Thumb instruction");
15312 code = get_arm_condition_code (current_insn_predicate);
15313 fputs (arm_condition_codes[code], stream);
15318 /* If CODE is 'd', then the X is a condition operand and the instruction
15319 should only be executed if the condition is true.
15320 if CODE is 'D', then the X is a condition operand and the instruction
15321 should only be executed if the condition is false: however, if the mode
15322 of the comparison is CCFPEmode, then always execute the instruction -- we
15323 do this because in these circumstances !GE does not necessarily imply LT;
15324 in these cases the instruction pattern will take care to make sure that
15325 an instruction containing %d will follow, thereby undoing the effects of
15326 doing this instruction unconditionally.
15327 If CODE is 'N' then X is a floating point operand that must be negated
15329 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15330 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15332 arm_print_operand (FILE *stream, rtx x, int code)
15337 fputs (ASM_COMMENT_START, stream);
15341 fputs (user_label_prefix, stream);
15345 fputs (REGISTER_PREFIX, stream);
15349 arm_print_condition (stream);
15353 /* Nothing in unified syntax, otherwise the current condition code. */
15354 if (!TARGET_UNIFIED_ASM)
15355 arm_print_condition (stream);
15359 /* The current condition code in unified syntax, otherwise nothing. */
15360 if (TARGET_UNIFIED_ASM)
15361 arm_print_condition (stream);
15365 /* The current condition code for a condition code setting instruction.
15366 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15367 if (TARGET_UNIFIED_ASM)
15369 fputc('s', stream);
15370 arm_print_condition (stream);
15374 arm_print_condition (stream);
15375 fputc('s', stream);
15380 /* If the instruction is conditionally executed then print
15381 the current condition code, otherwise print 's'. */
15382 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15383 if (current_insn_predicate)
15384 arm_print_condition (stream);
15386 fputc('s', stream);
15389 /* %# is a "break" sequence. It doesn't output anything, but is used to
15390 separate e.g. operand numbers from following text, if that text consists
15391 of further digits which we don't want to be part of the operand
15399 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15400 r = real_value_negate (&r);
15401 fprintf (stream, "%s", fp_const_from_val (&r));
15405 /* An integer or symbol address without a preceding # sign. */
15407 switch (GET_CODE (x))
15410 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15414 output_addr_const (stream, x);
15418 gcc_unreachable ();
15423 if (GET_CODE (x) == CONST_INT)
15426 val = ARM_SIGN_EXTEND (~INTVAL (x));
15427 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
15431 putc ('~', stream);
15432 output_addr_const (stream, x);
15437 /* The low 16 bits of an immediate constant. */
15438 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
15442 fprintf (stream, "%s", arithmetic_instr (x, 1));
15445 /* Truncate Cirrus shift counts. */
15447 if (GET_CODE (x) == CONST_INT)
15449 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
15452 arm_print_operand (stream, x, 0);
15456 fprintf (stream, "%s", arithmetic_instr (x, 0));
15464 if (!shift_operator (x, SImode))
15466 output_operand_lossage ("invalid shift operand");
15470 shift = shift_op (x, &val);
15474 fprintf (stream, ", %s ", shift);
15476 arm_print_operand (stream, XEXP (x, 1), 0);
15478 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
15483 /* An explanation of the 'Q', 'R' and 'H' register operands:
15485 In a pair of registers containing a DI or DF value the 'Q'
15486 operand returns the register number of the register containing
15487 the least significant part of the value. The 'R' operand returns
15488 the register number of the register containing the most
15489 significant part of the value.
15491 The 'H' operand returns the higher of the two register numbers.
15492 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15493 same as the 'Q' operand, since the most significant part of the
15494 value is held in the lower number register. The reverse is true
15495 on systems where WORDS_BIG_ENDIAN is false.
15497 The purpose of these operands is to distinguish between cases
15498 where the endian-ness of the values is important (for example
15499 when they are added together), and cases where the endian-ness
15500 is irrelevant, but the order of register operations is important.
15501 For example when loading a value from memory into a register
15502 pair, the endian-ness does not matter. Provided that the value
15503 from the lower memory address is put into the lower numbered
15504 register, and the value from the higher address is put into the
15505 higher numbered register, the load will work regardless of whether
15506 the value being loaded is big-wordian or little-wordian. The
15507 order of the two register loads can matter however, if the address
15508 of the memory location is actually held in one of the registers
15509 being overwritten by the load.
15511 The 'Q' and 'R' constraints are also available for 64-bit
15514 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
15516 rtx part = gen_lowpart (SImode, x);
15517 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
15521 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15523 output_operand_lossage ("invalid operand for code '%c'", code);
15527 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
15531 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
15533 enum machine_mode mode = GET_MODE (x);
15536 if (mode == VOIDmode)
15538 part = gen_highpart_mode (SImode, mode, x);
15539 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
15543 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15545 output_operand_lossage ("invalid operand for code '%c'", code);
15549 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
15553 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15555 output_operand_lossage ("invalid operand for code '%c'", code);
15559 asm_fprintf (stream, "%r", REGNO (x) + 1);
15563 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15565 output_operand_lossage ("invalid operand for code '%c'", code);
15569 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
15573 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15575 output_operand_lossage ("invalid operand for code '%c'", code);
15579 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
15583 asm_fprintf (stream, "%r",
15584 GET_CODE (XEXP (x, 0)) == REG
15585 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
15589 asm_fprintf (stream, "{%r-%r}",
15591 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
15594 /* Like 'M', but writing doubleword vector registers, for use by Neon
15598 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
15599 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
15601 asm_fprintf (stream, "{d%d}", regno);
15603 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
15608 /* CONST_TRUE_RTX means always -- that's the default. */
15609 if (x == const_true_rtx)
15612 if (!COMPARISON_P (x))
15614 output_operand_lossage ("invalid operand for code '%c'", code);
15618 fputs (arm_condition_codes[get_arm_condition_code (x)],
15623 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
15624 want to do that. */
15625 if (x == const_true_rtx)
15627 output_operand_lossage ("instruction never executed");
15630 if (!COMPARISON_P (x))
15632 output_operand_lossage ("invalid operand for code '%c'", code);
15636 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
15637 (get_arm_condition_code (x))],
15641 /* Cirrus registers can be accessed in a variety of ways:
15642 single floating point (f)
15643 double floating point (d)
15645 64bit integer (dx). */
15646 case 'W': /* Cirrus register in F mode. */
15647 case 'X': /* Cirrus register in D mode. */
15648 case 'Y': /* Cirrus register in FX mode. */
15649 case 'Z': /* Cirrus register in DX mode. */
15650 gcc_assert (GET_CODE (x) == REG
15651 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
15653 fprintf (stream, "mv%s%s",
15655 : code == 'X' ? "d"
15656 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
15660 /* Print cirrus register in the mode specified by the register's mode. */
15663 int mode = GET_MODE (x);
15665 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
15667 output_operand_lossage ("invalid operand for code '%c'", code);
15671 fprintf (stream, "mv%s%s",
15672 mode == DFmode ? "d"
15673 : mode == SImode ? "fx"
15674 : mode == DImode ? "dx"
15675 : "f", reg_names[REGNO (x)] + 2);
15681 if (GET_CODE (x) != REG
15682 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
15683 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
15684 /* Bad value for wCG register number. */
15686 output_operand_lossage ("invalid operand for code '%c'", code);
15691 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
15694 /* Print an iWMMXt control register name. */
15696 if (GET_CODE (x) != CONST_INT
15698 || INTVAL (x) >= 16)
15699 /* Bad value for wC register number. */
15701 output_operand_lossage ("invalid operand for code '%c'", code);
15707 static const char * wc_reg_names [16] =
15709 "wCID", "wCon", "wCSSF", "wCASF",
15710 "wC4", "wC5", "wC6", "wC7",
15711 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
15712 "wC12", "wC13", "wC14", "wC15"
15715 fprintf (stream, wc_reg_names [INTVAL (x)]);
15719 /* Print the high single-precision register of a VFP double-precision
15723 int mode = GET_MODE (x);
15726 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
15728 output_operand_lossage ("invalid operand for code '%c'", code);
15733 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
15735 output_operand_lossage ("invalid operand for code '%c'", code);
15739 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
15743 /* Print a VFP/Neon double precision or quad precision register name. */
15747 int mode = GET_MODE (x);
15748 int is_quad = (code == 'q');
15751 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
15753 output_operand_lossage ("invalid operand for code '%c'", code);
15757 if (GET_CODE (x) != REG
15758 || !IS_VFP_REGNUM (REGNO (x)))
15760 output_operand_lossage ("invalid operand for code '%c'", code);
15765 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
15766 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
15768 output_operand_lossage ("invalid operand for code '%c'", code);
15772 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
15773 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
15777 /* These two codes print the low/high doubleword register of a Neon quad
15778 register, respectively. For pair-structure types, can also print
15779 low/high quadword registers. */
15783 int mode = GET_MODE (x);
15786 if ((GET_MODE_SIZE (mode) != 16
15787 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
15789 output_operand_lossage ("invalid operand for code '%c'", code);
15794 if (!NEON_REGNO_OK_FOR_QUAD (regno))
15796 output_operand_lossage ("invalid operand for code '%c'", code);
15800 if (GET_MODE_SIZE (mode) == 16)
15801 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
15802 + (code == 'f' ? 1 : 0));
15804 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
15805 + (code == 'f' ? 1 : 0));
15809 /* Print a VFPv3 floating-point constant, represented as an integer
15813 int index = vfp3_const_double_index (x);
15814 gcc_assert (index != -1);
15815 fprintf (stream, "%d", index);
15819 /* Print bits representing opcode features for Neon.
15821 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
15822 and polynomials as unsigned.
15824 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
15826 Bit 2 is 1 for rounding functions, 0 otherwise. */
15828 /* Identify the type as 's', 'u', 'p' or 'f'. */
15831 HOST_WIDE_INT bits = INTVAL (x);
15832 fputc ("uspf"[bits & 3], stream);
15836 /* Likewise, but signed and unsigned integers are both 'i'. */
15839 HOST_WIDE_INT bits = INTVAL (x);
15840 fputc ("iipf"[bits & 3], stream);
15844 /* As for 'T', but emit 'u' instead of 'p'. */
15847 HOST_WIDE_INT bits = INTVAL (x);
15848 fputc ("usuf"[bits & 3], stream);
15852 /* Bit 2: rounding (vs none). */
15855 HOST_WIDE_INT bits = INTVAL (x);
15856 fputs ((bits & 4) != 0 ? "r" : "", stream);
15860 /* Memory operand for vld1/vst1 instruction. */
15864 bool postinc = FALSE;
15865 gcc_assert (GET_CODE (x) == MEM);
15866 addr = XEXP (x, 0);
15867 if (GET_CODE (addr) == POST_INC)
15870 addr = XEXP (addr, 0);
15872 asm_fprintf (stream, "[%r]", REGNO (addr));
15874 fputs("!", stream);
15878 /* Translate an S register number into a D register number and element index. */
15881 int mode = GET_MODE (x);
15884 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
15886 output_operand_lossage ("invalid operand for code '%c'", code);
15891 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15893 output_operand_lossage ("invalid operand for code '%c'", code);
15897 regno = regno - FIRST_VFP_REGNUM;
15898 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
15902 /* Register specifier for vld1.16/vst1.16. Translate the S register
15903 number into a D register number and element index. */
15906 int mode = GET_MODE (x);
15909 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
15911 output_operand_lossage ("invalid operand for code '%c'", code);
15916 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15918 output_operand_lossage ("invalid operand for code '%c'", code);
15922 regno = regno - FIRST_VFP_REGNUM;
15923 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
15930 output_operand_lossage ("missing operand");
15934 switch (GET_CODE (x))
15937 asm_fprintf (stream, "%r", REGNO (x));
15941 output_memory_reference_mode = GET_MODE (x);
15942 output_address (XEXP (x, 0));
15949 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
15950 sizeof (fpstr), 0, 1);
15951 fprintf (stream, "#%s", fpstr);
15954 fprintf (stream, "#%s", fp_immediate_constant (x));
15958 gcc_assert (GET_CODE (x) != NEG);
15959 fputc ('#', stream);
15960 if (GET_CODE (x) == HIGH)
15962 fputs (":lower16:", stream);
15966 output_addr_const (stream, x);
15972 /* Target hook for printing a memory address. */
15974 arm_print_operand_address (FILE *stream, rtx x)
15978 int is_minus = GET_CODE (x) == MINUS;
15980 if (GET_CODE (x) == REG)
15981 asm_fprintf (stream, "[%r, #0]", REGNO (x));
15982 else if (GET_CODE (x) == PLUS || is_minus)
15984 rtx base = XEXP (x, 0);
15985 rtx index = XEXP (x, 1);
15986 HOST_WIDE_INT offset = 0;
15987 if (GET_CODE (base) != REG
15988 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
15990 /* Ensure that BASE is a register. */
15991 /* (one of them must be). */
15992 /* Also ensure the SP is not used as in index register. */
15997 switch (GET_CODE (index))
16000 offset = INTVAL (index);
16003 asm_fprintf (stream, "[%r, #%wd]",
16004 REGNO (base), offset);
16008 asm_fprintf (stream, "[%r, %s%r]",
16009 REGNO (base), is_minus ? "-" : "",
16019 asm_fprintf (stream, "[%r, %s%r",
16020 REGNO (base), is_minus ? "-" : "",
16021 REGNO (XEXP (index, 0)));
16022 arm_print_operand (stream, index, 'S');
16023 fputs ("]", stream);
16028 gcc_unreachable ();
16031 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16032 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16034 extern enum machine_mode output_memory_reference_mode;
16036 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16038 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16039 asm_fprintf (stream, "[%r, #%s%d]!",
16040 REGNO (XEXP (x, 0)),
16041 GET_CODE (x) == PRE_DEC ? "-" : "",
16042 GET_MODE_SIZE (output_memory_reference_mode));
16044 asm_fprintf (stream, "[%r], #%s%d",
16045 REGNO (XEXP (x, 0)),
16046 GET_CODE (x) == POST_DEC ? "-" : "",
16047 GET_MODE_SIZE (output_memory_reference_mode));
16049 else if (GET_CODE (x) == PRE_MODIFY)
16051 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16052 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16053 asm_fprintf (stream, "#%wd]!",
16054 INTVAL (XEXP (XEXP (x, 1), 1)));
16056 asm_fprintf (stream, "%r]!",
16057 REGNO (XEXP (XEXP (x, 1), 1)));
16059 else if (GET_CODE (x) == POST_MODIFY)
16061 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16062 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16063 asm_fprintf (stream, "#%wd",
16064 INTVAL (XEXP (XEXP (x, 1), 1)));
16066 asm_fprintf (stream, "%r",
16067 REGNO (XEXP (XEXP (x, 1), 1)));
16069 else output_addr_const (stream, x);
16073 if (GET_CODE (x) == REG)
16074 asm_fprintf (stream, "[%r]", REGNO (x));
16075 else if (GET_CODE (x) == POST_INC)
16076 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16077 else if (GET_CODE (x) == PLUS)
16079 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16080 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16081 asm_fprintf (stream, "[%r, #%wd]",
16082 REGNO (XEXP (x, 0)),
16083 INTVAL (XEXP (x, 1)));
16085 asm_fprintf (stream, "[%r, %r]",
16086 REGNO (XEXP (x, 0)),
16087 REGNO (XEXP (x, 1)));
16090 output_addr_const (stream, x);
16094 /* Target hook for indicating whether a punctuation character for
16095 TARGET_PRINT_OPERAND is valid. */
16097 arm_print_operand_punct_valid_p (unsigned char code)
16099 return (code == '@' || code == '|' || code == '.'
16100 || code == '(' || code == ')' || code == '#'
16101 || (TARGET_32BIT && (code == '?'))
16102 || (TARGET_THUMB2 && (code == '!'))
16103 || (TARGET_THUMB && (code == '_')));
16106 /* Target hook for assembling integer objects. The ARM version needs to
16107 handle word-sized values specially. */
16109 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16111 enum machine_mode mode;
16113 if (size == UNITS_PER_WORD && aligned_p)
16115 fputs ("\t.word\t", asm_out_file);
16116 output_addr_const (asm_out_file, x);
16118 /* Mark symbols as position independent. We only do this in the
16119 .text segment, not in the .data segment. */
16120 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16121 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16123 /* See legitimize_pic_address for an explanation of the
16124 TARGET_VXWORKS_RTP check. */
16125 if (TARGET_VXWORKS_RTP
16126 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16127 fputs ("(GOT)", asm_out_file);
16129 fputs ("(GOTOFF)", asm_out_file);
16131 fputc ('\n', asm_out_file);
16135 mode = GET_MODE (x);
16137 if (arm_vector_mode_supported_p (mode))
16141 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16143 units = CONST_VECTOR_NUNITS (x);
16144 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16146 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16147 for (i = 0; i < units; i++)
16149 rtx elt = CONST_VECTOR_ELT (x, i);
16151 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16154 for (i = 0; i < units; i++)
16156 rtx elt = CONST_VECTOR_ELT (x, i);
16157 REAL_VALUE_TYPE rval;
16159 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16162 (rval, GET_MODE_INNER (mode),
16163 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16169 return default_assemble_integer (x, size, aligned_p);
16173 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16177 if (!TARGET_AAPCS_BASED)
16180 default_named_section_asm_out_constructor
16181 : default_named_section_asm_out_destructor) (symbol, priority);
16185 /* Put these in the .init_array section, using a special relocation. */
16186 if (priority != DEFAULT_INIT_PRIORITY)
16189 sprintf (buf, "%s.%.5u",
16190 is_ctor ? ".init_array" : ".fini_array",
16192 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16199 switch_to_section (s);
16200 assemble_align (POINTER_SIZE);
16201 fputs ("\t.word\t", asm_out_file);
16202 output_addr_const (asm_out_file, symbol);
16203 fputs ("(target1)\n", asm_out_file);
16206 /* Add a function to the list of static constructors. */
16209 arm_elf_asm_constructor (rtx symbol, int priority)
16211 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16214 /* Add a function to the list of static destructors. */
16217 arm_elf_asm_destructor (rtx symbol, int priority)
16219 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16222 /* A finite state machine takes care of noticing whether or not instructions
16223 can be conditionally executed, and thus decrease execution time and code
16224 size by deleting branch instructions. The fsm is controlled by
16225 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16227 /* The state of the fsm controlling condition codes are:
16228 0: normal, do nothing special
16229 1: make ASM_OUTPUT_OPCODE not output this instruction
16230 2: make ASM_OUTPUT_OPCODE not output this instruction
16231 3: make instructions conditional
16232 4: make instructions conditional
16234 State transitions (state->state by whom under condition):
16235 0 -> 1 final_prescan_insn if the `target' is a label
16236 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16237 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16238 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16239 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16240 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16241 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16242 (the target insn is arm_target_insn).
16244 If the jump clobbers the conditions then we use states 2 and 4.
16246 A similar thing can be done with conditional return insns.
16248 XXX In case the `target' is an unconditional branch, this conditionalising
16249 of the instructions always reduces code size, but not always execution
16250 time. But then, I want to reduce the code size to somewhere near what
16251 /bin/cc produces. */
16253 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16254 instructions. When a COND_EXEC instruction is seen the subsequent
16255 instructions are scanned so that multiple conditional instructions can be
16256 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16257 specify the length and true/false mask for the IT block. These will be
16258 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16260 /* Returns the index of the ARM condition code string in
16261 `arm_condition_codes'. COMPARISON should be an rtx like
16262 `(eq (...) (...))'. */
16263 static enum arm_cond_code
16264 get_arm_condition_code (rtx comparison)
16266 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16267 enum arm_cond_code code;
16268 enum rtx_code comp_code = GET_CODE (comparison);
16270 if (GET_MODE_CLASS (mode) != MODE_CC)
16271 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16272 XEXP (comparison, 1));
16276 case CC_DNEmode: code = ARM_NE; goto dominance;
16277 case CC_DEQmode: code = ARM_EQ; goto dominance;
16278 case CC_DGEmode: code = ARM_GE; goto dominance;
16279 case CC_DGTmode: code = ARM_GT; goto dominance;
16280 case CC_DLEmode: code = ARM_LE; goto dominance;
16281 case CC_DLTmode: code = ARM_LT; goto dominance;
16282 case CC_DGEUmode: code = ARM_CS; goto dominance;
16283 case CC_DGTUmode: code = ARM_HI; goto dominance;
16284 case CC_DLEUmode: code = ARM_LS; goto dominance;
16285 case CC_DLTUmode: code = ARM_CC;
16288 gcc_assert (comp_code == EQ || comp_code == NE);
16290 if (comp_code == EQ)
16291 return ARM_INVERSE_CONDITION_CODE (code);
16297 case NE: return ARM_NE;
16298 case EQ: return ARM_EQ;
16299 case GE: return ARM_PL;
16300 case LT: return ARM_MI;
16301 default: gcc_unreachable ();
16307 case NE: return ARM_NE;
16308 case EQ: return ARM_EQ;
16309 default: gcc_unreachable ();
16315 case NE: return ARM_MI;
16316 case EQ: return ARM_PL;
16317 default: gcc_unreachable ();
16322 /* These encodings assume that AC=1 in the FPA system control
16323 byte. This allows us to handle all cases except UNEQ and
16327 case GE: return ARM_GE;
16328 case GT: return ARM_GT;
16329 case LE: return ARM_LS;
16330 case LT: return ARM_MI;
16331 case NE: return ARM_NE;
16332 case EQ: return ARM_EQ;
16333 case ORDERED: return ARM_VC;
16334 case UNORDERED: return ARM_VS;
16335 case UNLT: return ARM_LT;
16336 case UNLE: return ARM_LE;
16337 case UNGT: return ARM_HI;
16338 case UNGE: return ARM_PL;
16339 /* UNEQ and LTGT do not have a representation. */
16340 case UNEQ: /* Fall through. */
16341 case LTGT: /* Fall through. */
16342 default: gcc_unreachable ();
16348 case NE: return ARM_NE;
16349 case EQ: return ARM_EQ;
16350 case GE: return ARM_LE;
16351 case GT: return ARM_LT;
16352 case LE: return ARM_GE;
16353 case LT: return ARM_GT;
16354 case GEU: return ARM_LS;
16355 case GTU: return ARM_CC;
16356 case LEU: return ARM_CS;
16357 case LTU: return ARM_HI;
16358 default: gcc_unreachable ();
16364 case LTU: return ARM_CS;
16365 case GEU: return ARM_CC;
16366 default: gcc_unreachable ();
16372 case NE: return ARM_NE;
16373 case EQ: return ARM_EQ;
16374 case GEU: return ARM_CS;
16375 case GTU: return ARM_HI;
16376 case LEU: return ARM_LS;
16377 case LTU: return ARM_CC;
16378 default: gcc_unreachable ();
16384 case GE: return ARM_GE;
16385 case LT: return ARM_LT;
16386 case GEU: return ARM_CS;
16387 case LTU: return ARM_CC;
16388 default: gcc_unreachable ();
16394 case NE: return ARM_NE;
16395 case EQ: return ARM_EQ;
16396 case GE: return ARM_GE;
16397 case GT: return ARM_GT;
16398 case LE: return ARM_LE;
16399 case LT: return ARM_LT;
16400 case GEU: return ARM_CS;
16401 case GTU: return ARM_HI;
16402 case LEU: return ARM_LS;
16403 case LTU: return ARM_CC;
16404 default: gcc_unreachable ();
16407 default: gcc_unreachable ();
16411 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
16414 thumb2_final_prescan_insn (rtx insn)
16416 rtx first_insn = insn;
16417 rtx body = PATTERN (insn);
16419 enum arm_cond_code code;
16423 /* Remove the previous insn from the count of insns to be output. */
16424 if (arm_condexec_count)
16425 arm_condexec_count--;
16427 /* Nothing to do if we are already inside a conditional block. */
16428 if (arm_condexec_count)
16431 if (GET_CODE (body) != COND_EXEC)
16434 /* Conditional jumps are implemented directly. */
16435 if (GET_CODE (insn) == JUMP_INSN)
16438 predicate = COND_EXEC_TEST (body);
16439 arm_current_cc = get_arm_condition_code (predicate);
16441 n = get_attr_ce_count (insn);
16442 arm_condexec_count = 1;
16443 arm_condexec_mask = (1 << n) - 1;
16444 arm_condexec_masklen = n;
16445 /* See if subsequent instructions can be combined into the same block. */
16448 insn = next_nonnote_insn (insn);
16450 /* Jumping into the middle of an IT block is illegal, so a label or
16451 barrier terminates the block. */
16452 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
16455 body = PATTERN (insn);
16456 /* USE and CLOBBER aren't really insns, so just skip them. */
16457 if (GET_CODE (body) == USE
16458 || GET_CODE (body) == CLOBBER)
16461 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
16462 if (GET_CODE (body) != COND_EXEC)
16464 /* Allow up to 4 conditionally executed instructions in a block. */
16465 n = get_attr_ce_count (insn);
16466 if (arm_condexec_masklen + n > 4)
16469 predicate = COND_EXEC_TEST (body);
16470 code = get_arm_condition_code (predicate);
16471 mask = (1 << n) - 1;
16472 if (arm_current_cc == code)
16473 arm_condexec_mask |= (mask << arm_condexec_masklen);
16474 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
16477 arm_condexec_count++;
16478 arm_condexec_masklen += n;
16480 /* A jump must be the last instruction in a conditional block. */
16481 if (GET_CODE(insn) == JUMP_INSN)
16484 /* Restore recog_data (getting the attributes of other insns can
16485 destroy this array, but final.c assumes that it remains intact
16486 across this call). */
16487 extract_constrain_insn_cached (first_insn);
16491 arm_final_prescan_insn (rtx insn)
16493 /* BODY will hold the body of INSN. */
16494 rtx body = PATTERN (insn);
16496 /* This will be 1 if trying to repeat the trick, and things need to be
16497 reversed if it appears to fail. */
16500 /* If we start with a return insn, we only succeed if we find another one. */
16501 int seeking_return = 0;
16503 /* START_INSN will hold the insn from where we start looking. This is the
16504 first insn after the following code_label if REVERSE is true. */
16505 rtx start_insn = insn;
16507 /* If in state 4, check if the target branch is reached, in order to
16508 change back to state 0. */
16509 if (arm_ccfsm_state == 4)
16511 if (insn == arm_target_insn)
16513 arm_target_insn = NULL;
16514 arm_ccfsm_state = 0;
16519 /* If in state 3, it is possible to repeat the trick, if this insn is an
16520 unconditional branch to a label, and immediately following this branch
16521 is the previous target label which is only used once, and the label this
16522 branch jumps to is not too far off. */
16523 if (arm_ccfsm_state == 3)
16525 if (simplejump_p (insn))
16527 start_insn = next_nonnote_insn (start_insn);
16528 if (GET_CODE (start_insn) == BARRIER)
16530 /* XXX Isn't this always a barrier? */
16531 start_insn = next_nonnote_insn (start_insn);
16533 if (GET_CODE (start_insn) == CODE_LABEL
16534 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16535 && LABEL_NUSES (start_insn) == 1)
16540 else if (GET_CODE (body) == RETURN)
16542 start_insn = next_nonnote_insn (start_insn);
16543 if (GET_CODE (start_insn) == BARRIER)
16544 start_insn = next_nonnote_insn (start_insn);
16545 if (GET_CODE (start_insn) == CODE_LABEL
16546 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16547 && LABEL_NUSES (start_insn) == 1)
16550 seeking_return = 1;
16559 gcc_assert (!arm_ccfsm_state || reverse);
16560 if (GET_CODE (insn) != JUMP_INSN)
16563 /* This jump might be paralleled with a clobber of the condition codes
16564 the jump should always come first */
16565 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
16566 body = XVECEXP (body, 0, 0);
16569 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
16570 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
16573 int fail = FALSE, succeed = FALSE;
16574 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
16575 int then_not_else = TRUE;
16576 rtx this_insn = start_insn, label = 0;
16578 /* Register the insn jumped to. */
16581 if (!seeking_return)
16582 label = XEXP (SET_SRC (body), 0);
16584 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
16585 label = XEXP (XEXP (SET_SRC (body), 1), 0);
16586 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
16588 label = XEXP (XEXP (SET_SRC (body), 2), 0);
16589 then_not_else = FALSE;
16591 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
16592 seeking_return = 1;
16593 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
16595 seeking_return = 1;
16596 then_not_else = FALSE;
16599 gcc_unreachable ();
16601 /* See how many insns this branch skips, and what kind of insns. If all
16602 insns are okay, and the label or unconditional branch to the same
16603 label is not too far away, succeed. */
16604 for (insns_skipped = 0;
16605 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
16609 this_insn = next_nonnote_insn (this_insn);
16613 switch (GET_CODE (this_insn))
16616 /* Succeed if it is the target label, otherwise fail since
16617 control falls in from somewhere else. */
16618 if (this_insn == label)
16620 arm_ccfsm_state = 1;
16628 /* Succeed if the following insn is the target label.
16630 If return insns are used then the last insn in a function
16631 will be a barrier. */
16632 this_insn = next_nonnote_insn (this_insn);
16633 if (this_insn && this_insn == label)
16635 arm_ccfsm_state = 1;
16643 /* The AAPCS says that conditional calls should not be
16644 used since they make interworking inefficient (the
16645 linker can't transform BL<cond> into BLX). That's
16646 only a problem if the machine has BLX. */
16653 /* Succeed if the following insn is the target label, or
16654 if the following two insns are a barrier and the
16656 this_insn = next_nonnote_insn (this_insn);
16657 if (this_insn && GET_CODE (this_insn) == BARRIER)
16658 this_insn = next_nonnote_insn (this_insn);
16660 if (this_insn && this_insn == label
16661 && insns_skipped < max_insns_skipped)
16663 arm_ccfsm_state = 1;
16671 /* If this is an unconditional branch to the same label, succeed.
16672 If it is to another label, do nothing. If it is conditional,
16674 /* XXX Probably, the tests for SET and the PC are
16677 scanbody = PATTERN (this_insn);
16678 if (GET_CODE (scanbody) == SET
16679 && GET_CODE (SET_DEST (scanbody)) == PC)
16681 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
16682 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
16684 arm_ccfsm_state = 2;
16687 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
16690 /* Fail if a conditional return is undesirable (e.g. on a
16691 StrongARM), but still allow this if optimizing for size. */
16692 else if (GET_CODE (scanbody) == RETURN
16693 && !use_return_insn (TRUE, NULL)
16696 else if (GET_CODE (scanbody) == RETURN
16699 arm_ccfsm_state = 2;
16702 else if (GET_CODE (scanbody) == PARALLEL)
16704 switch (get_attr_conds (this_insn))
16714 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
16719 /* Instructions using or affecting the condition codes make it
16721 scanbody = PATTERN (this_insn);
16722 if (!(GET_CODE (scanbody) == SET
16723 || GET_CODE (scanbody) == PARALLEL)
16724 || get_attr_conds (this_insn) != CONDS_NOCOND)
16727 /* A conditional cirrus instruction must be followed by
16728 a non Cirrus instruction. However, since we
16729 conditionalize instructions in this function and by
16730 the time we get here we can't add instructions
16731 (nops), because shorten_branches() has already been
16732 called, we will disable conditionalizing Cirrus
16733 instructions to be safe. */
16734 if (GET_CODE (scanbody) != USE
16735 && GET_CODE (scanbody) != CLOBBER
16736 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
16746 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
16747 arm_target_label = CODE_LABEL_NUMBER (label);
16750 gcc_assert (seeking_return || arm_ccfsm_state == 2);
16752 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
16754 this_insn = next_nonnote_insn (this_insn);
16755 gcc_assert (!this_insn
16756 || (GET_CODE (this_insn) != BARRIER
16757 && GET_CODE (this_insn) != CODE_LABEL));
16761 /* Oh, dear! we ran off the end.. give up. */
16762 extract_constrain_insn_cached (insn);
16763 arm_ccfsm_state = 0;
16764 arm_target_insn = NULL;
16767 arm_target_insn = this_insn;
16770 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
16773 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
16775 if (reverse || then_not_else)
16776 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
16779 /* Restore recog_data (getting the attributes of other insns can
16780 destroy this array, but final.c assumes that it remains intact
16781 across this call. */
16782 extract_constrain_insn_cached (insn);
16786 /* Output IT instructions. */
16788 thumb2_asm_output_opcode (FILE * stream)
16793 if (arm_condexec_mask)
16795 for (n = 0; n < arm_condexec_masklen; n++)
16796 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
16798 asm_fprintf(stream, "i%s\t%s\n\t", buff,
16799 arm_condition_codes[arm_current_cc]);
16800 arm_condexec_mask = 0;
16804 /* Returns true if REGNO is a valid register
16805 for holding a quantity of type MODE. */
16807 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
16809 if (GET_MODE_CLASS (mode) == MODE_CC)
16810 return (regno == CC_REGNUM
16811 || (TARGET_HARD_FLOAT && TARGET_VFP
16812 && regno == VFPCC_REGNUM));
16815 /* For the Thumb we only allow values bigger than SImode in
16816 registers 0 - 6, so that there is always a second low
16817 register available to hold the upper part of the value.
16818 We probably we ought to ensure that the register is the
16819 start of an even numbered register pair. */
16820 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
16822 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
16823 && IS_CIRRUS_REGNUM (regno))
16824 /* We have outlawed SI values in Cirrus registers because they
16825 reside in the lower 32 bits, but SF values reside in the
16826 upper 32 bits. This causes gcc all sorts of grief. We can't
16827 even split the registers into pairs because Cirrus SI values
16828 get sign extended to 64bits-- aldyh. */
16829 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
16831 if (TARGET_HARD_FLOAT && TARGET_VFP
16832 && IS_VFP_REGNUM (regno))
16834 if (mode == SFmode || mode == SImode)
16835 return VFP_REGNO_OK_FOR_SINGLE (regno);
16837 if (mode == DFmode)
16838 return VFP_REGNO_OK_FOR_DOUBLE (regno);
16840 /* VFP registers can hold HFmode values, but there is no point in
16841 putting them there unless we have hardware conversion insns. */
16842 if (mode == HFmode)
16843 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
16846 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
16847 || (VALID_NEON_QREG_MODE (mode)
16848 && NEON_REGNO_OK_FOR_QUAD (regno))
16849 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
16850 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
16851 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
16852 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
16853 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
16858 if (TARGET_REALLY_IWMMXT)
16860 if (IS_IWMMXT_GR_REGNUM (regno))
16861 return mode == SImode;
16863 if (IS_IWMMXT_REGNUM (regno))
16864 return VALID_IWMMXT_REG_MODE (mode);
16867 /* We allow almost any value to be stored in the general registers.
16868 Restrict doubleword quantities to even register pairs so that we can
16869 use ldrd. Do not allow very large Neon structure opaque modes in
16870 general registers; they would use too many. */
16871 if (regno <= LAST_ARM_REGNUM)
16872 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
16873 && ARM_NUM_REGS (mode) <= 4;
16875 if (regno == FRAME_POINTER_REGNUM
16876 || regno == ARG_POINTER_REGNUM)
16877 /* We only allow integers in the fake hard registers. */
16878 return GET_MODE_CLASS (mode) == MODE_INT;
16880 /* The only registers left are the FPA registers
16881 which we only allow to hold FP values. */
16882 return (TARGET_HARD_FLOAT && TARGET_FPA
16883 && GET_MODE_CLASS (mode) == MODE_FLOAT
16884 && regno >= FIRST_FPA_REGNUM
16885 && regno <= LAST_FPA_REGNUM);
16888 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
16889 not used in arm mode. */
16892 arm_regno_class (int regno)
16896 if (regno == STACK_POINTER_REGNUM)
16898 if (regno == CC_REGNUM)
16905 if (TARGET_THUMB2 && regno < 8)
16908 if ( regno <= LAST_ARM_REGNUM
16909 || regno == FRAME_POINTER_REGNUM
16910 || regno == ARG_POINTER_REGNUM)
16911 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
16913 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
16914 return TARGET_THUMB2 ? CC_REG : NO_REGS;
16916 if (IS_CIRRUS_REGNUM (regno))
16917 return CIRRUS_REGS;
16919 if (IS_VFP_REGNUM (regno))
16921 if (regno <= D7_VFP_REGNUM)
16922 return VFP_D0_D7_REGS;
16923 else if (regno <= LAST_LO_VFP_REGNUM)
16924 return VFP_LO_REGS;
16926 return VFP_HI_REGS;
16929 if (IS_IWMMXT_REGNUM (regno))
16930 return IWMMXT_REGS;
16932 if (IS_IWMMXT_GR_REGNUM (regno))
16933 return IWMMXT_GR_REGS;
16938 /* Handle a special case when computing the offset
16939 of an argument from the frame pointer. */
16941 arm_debugger_arg_offset (int value, rtx addr)
16945 /* We are only interested if dbxout_parms() failed to compute the offset. */
16949 /* We can only cope with the case where the address is held in a register. */
16950 if (GET_CODE (addr) != REG)
16953 /* If we are using the frame pointer to point at the argument, then
16954 an offset of 0 is correct. */
16955 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
16958 /* If we are using the stack pointer to point at the
16959 argument, then an offset of 0 is correct. */
16960 /* ??? Check this is consistent with thumb2 frame layout. */
16961 if ((TARGET_THUMB || !frame_pointer_needed)
16962 && REGNO (addr) == SP_REGNUM)
16965 /* Oh dear. The argument is pointed to by a register rather
16966 than being held in a register, or being stored at a known
16967 offset from the frame pointer. Since GDB only understands
16968 those two kinds of argument we must translate the address
16969 held in the register into an offset from the frame pointer.
16970 We do this by searching through the insns for the function
16971 looking to see where this register gets its value. If the
16972 register is initialized from the frame pointer plus an offset
16973 then we are in luck and we can continue, otherwise we give up.
16975 This code is exercised by producing debugging information
16976 for a function with arguments like this:
16978 double func (double a, double b, int c, double d) {return d;}
16980 Without this code the stab for parameter 'd' will be set to
16981 an offset of 0 from the frame pointer, rather than 8. */
16983 /* The if() statement says:
16985 If the insn is a normal instruction
16986 and if the insn is setting the value in a register
16987 and if the register being set is the register holding the address of the argument
16988 and if the address is computing by an addition
16989 that involves adding to a register
16990 which is the frame pointer
16995 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16997 if ( GET_CODE (insn) == INSN
16998 && GET_CODE (PATTERN (insn)) == SET
16999 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17000 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17001 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17002 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17003 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17006 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17015 warning (0, "unable to compute real location of stacked parameter");
17016 value = 8; /* XXX magic hack */
17022 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17025 if ((MASK) & insn_flags) \
17026 add_builtin_function ((NAME), (TYPE), (CODE), \
17027 BUILT_IN_MD, NULL, NULL_TREE); \
17031 struct builtin_description
17033 const unsigned int mask;
17034 const enum insn_code icode;
17035 const char * const name;
17036 const enum arm_builtins code;
17037 const enum rtx_code comparison;
17038 const unsigned int flag;
17041 static const struct builtin_description bdesc_2arg[] =
17043 #define IWMMXT_BUILTIN(code, string, builtin) \
17044 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17045 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17047 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17048 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17049 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17050 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17051 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17052 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17053 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17054 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17055 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17056 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17057 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17058 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17059 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17060 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17061 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17062 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17063 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17064 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17065 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17066 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17067 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17068 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17069 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17070 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17071 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17072 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17073 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17074 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17075 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17076 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17077 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17078 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17079 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17080 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17081 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17082 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17083 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17084 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17085 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17086 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17087 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17088 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17089 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17090 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17091 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17092 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17093 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17094 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17095 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17096 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17097 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17098 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17099 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17100 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17101 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17102 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17103 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17104 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17106 #define IWMMXT_BUILTIN2(code, builtin) \
17107 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17109 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17110 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17111 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17112 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17113 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17114 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17115 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17116 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17117 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17118 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17119 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17120 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17121 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17122 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17123 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17124 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17125 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17126 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17127 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17128 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17129 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17130 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17131 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17132 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17133 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17134 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17135 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17136 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17137 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17138 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17139 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17140 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17143 static const struct builtin_description bdesc_1arg[] =
17145 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17146 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17147 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17148 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17149 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17150 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17151 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17152 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17153 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17154 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17155 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17156 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17157 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17158 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17159 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17160 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17161 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17162 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17165 /* Set up all the iWMMXt builtins. This is
17166 not called if TARGET_IWMMXT is zero. */
17169 arm_init_iwmmxt_builtins (void)
17171 const struct builtin_description * d;
17173 tree endlink = void_list_node;
17175 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17176 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17177 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17180 = build_function_type (integer_type_node,
17181 tree_cons (NULL_TREE, integer_type_node, endlink));
17182 tree v8qi_ftype_v8qi_v8qi_int
17183 = build_function_type (V8QI_type_node,
17184 tree_cons (NULL_TREE, V8QI_type_node,
17185 tree_cons (NULL_TREE, V8QI_type_node,
17186 tree_cons (NULL_TREE,
17189 tree v4hi_ftype_v4hi_int
17190 = build_function_type (V4HI_type_node,
17191 tree_cons (NULL_TREE, V4HI_type_node,
17192 tree_cons (NULL_TREE, integer_type_node,
17194 tree v2si_ftype_v2si_int
17195 = build_function_type (V2SI_type_node,
17196 tree_cons (NULL_TREE, V2SI_type_node,
17197 tree_cons (NULL_TREE, integer_type_node,
17199 tree v2si_ftype_di_di
17200 = build_function_type (V2SI_type_node,
17201 tree_cons (NULL_TREE, long_long_integer_type_node,
17202 tree_cons (NULL_TREE, long_long_integer_type_node,
17204 tree di_ftype_di_int
17205 = build_function_type (long_long_integer_type_node,
17206 tree_cons (NULL_TREE, long_long_integer_type_node,
17207 tree_cons (NULL_TREE, integer_type_node,
17209 tree di_ftype_di_int_int
17210 = build_function_type (long_long_integer_type_node,
17211 tree_cons (NULL_TREE, long_long_integer_type_node,
17212 tree_cons (NULL_TREE, integer_type_node,
17213 tree_cons (NULL_TREE,
17216 tree int_ftype_v8qi
17217 = build_function_type (integer_type_node,
17218 tree_cons (NULL_TREE, V8QI_type_node,
17220 tree int_ftype_v4hi
17221 = build_function_type (integer_type_node,
17222 tree_cons (NULL_TREE, V4HI_type_node,
17224 tree int_ftype_v2si
17225 = build_function_type (integer_type_node,
17226 tree_cons (NULL_TREE, V2SI_type_node,
17228 tree int_ftype_v8qi_int
17229 = build_function_type (integer_type_node,
17230 tree_cons (NULL_TREE, V8QI_type_node,
17231 tree_cons (NULL_TREE, integer_type_node,
17233 tree int_ftype_v4hi_int
17234 = build_function_type (integer_type_node,
17235 tree_cons (NULL_TREE, V4HI_type_node,
17236 tree_cons (NULL_TREE, integer_type_node,
17238 tree int_ftype_v2si_int
17239 = build_function_type (integer_type_node,
17240 tree_cons (NULL_TREE, V2SI_type_node,
17241 tree_cons (NULL_TREE, integer_type_node,
17243 tree v8qi_ftype_v8qi_int_int
17244 = build_function_type (V8QI_type_node,
17245 tree_cons (NULL_TREE, V8QI_type_node,
17246 tree_cons (NULL_TREE, integer_type_node,
17247 tree_cons (NULL_TREE,
17250 tree v4hi_ftype_v4hi_int_int
17251 = build_function_type (V4HI_type_node,
17252 tree_cons (NULL_TREE, V4HI_type_node,
17253 tree_cons (NULL_TREE, integer_type_node,
17254 tree_cons (NULL_TREE,
17257 tree v2si_ftype_v2si_int_int
17258 = build_function_type (V2SI_type_node,
17259 tree_cons (NULL_TREE, V2SI_type_node,
17260 tree_cons (NULL_TREE, integer_type_node,
17261 tree_cons (NULL_TREE,
17264 /* Miscellaneous. */
17265 tree v8qi_ftype_v4hi_v4hi
17266 = build_function_type (V8QI_type_node,
17267 tree_cons (NULL_TREE, V4HI_type_node,
17268 tree_cons (NULL_TREE, V4HI_type_node,
17270 tree v4hi_ftype_v2si_v2si
17271 = build_function_type (V4HI_type_node,
17272 tree_cons (NULL_TREE, V2SI_type_node,
17273 tree_cons (NULL_TREE, V2SI_type_node,
17275 tree v2si_ftype_v4hi_v4hi
17276 = build_function_type (V2SI_type_node,
17277 tree_cons (NULL_TREE, V4HI_type_node,
17278 tree_cons (NULL_TREE, V4HI_type_node,
17280 tree v2si_ftype_v8qi_v8qi
17281 = build_function_type (V2SI_type_node,
17282 tree_cons (NULL_TREE, V8QI_type_node,
17283 tree_cons (NULL_TREE, V8QI_type_node,
17285 tree v4hi_ftype_v4hi_di
17286 = build_function_type (V4HI_type_node,
17287 tree_cons (NULL_TREE, V4HI_type_node,
17288 tree_cons (NULL_TREE,
17289 long_long_integer_type_node,
17291 tree v2si_ftype_v2si_di
17292 = build_function_type (V2SI_type_node,
17293 tree_cons (NULL_TREE, V2SI_type_node,
17294 tree_cons (NULL_TREE,
17295 long_long_integer_type_node,
17297 tree void_ftype_int_int
17298 = build_function_type (void_type_node,
17299 tree_cons (NULL_TREE, integer_type_node,
17300 tree_cons (NULL_TREE, integer_type_node,
17303 = build_function_type (long_long_unsigned_type_node, endlink);
17305 = build_function_type (long_long_integer_type_node,
17306 tree_cons (NULL_TREE, V8QI_type_node,
17309 = build_function_type (long_long_integer_type_node,
17310 tree_cons (NULL_TREE, V4HI_type_node,
17313 = build_function_type (long_long_integer_type_node,
17314 tree_cons (NULL_TREE, V2SI_type_node,
17316 tree v2si_ftype_v4hi
17317 = build_function_type (V2SI_type_node,
17318 tree_cons (NULL_TREE, V4HI_type_node,
17320 tree v4hi_ftype_v8qi
17321 = build_function_type (V4HI_type_node,
17322 tree_cons (NULL_TREE, V8QI_type_node,
17325 tree di_ftype_di_v4hi_v4hi
17326 = build_function_type (long_long_unsigned_type_node,
17327 tree_cons (NULL_TREE,
17328 long_long_unsigned_type_node,
17329 tree_cons (NULL_TREE, V4HI_type_node,
17330 tree_cons (NULL_TREE,
17334 tree di_ftype_v4hi_v4hi
17335 = build_function_type (long_long_unsigned_type_node,
17336 tree_cons (NULL_TREE, V4HI_type_node,
17337 tree_cons (NULL_TREE, V4HI_type_node,
17340 /* Normal vector binops. */
17341 tree v8qi_ftype_v8qi_v8qi
17342 = build_function_type (V8QI_type_node,
17343 tree_cons (NULL_TREE, V8QI_type_node,
17344 tree_cons (NULL_TREE, V8QI_type_node,
17346 tree v4hi_ftype_v4hi_v4hi
17347 = build_function_type (V4HI_type_node,
17348 tree_cons (NULL_TREE, V4HI_type_node,
17349 tree_cons (NULL_TREE, V4HI_type_node,
17351 tree v2si_ftype_v2si_v2si
17352 = build_function_type (V2SI_type_node,
17353 tree_cons (NULL_TREE, V2SI_type_node,
17354 tree_cons (NULL_TREE, V2SI_type_node,
17356 tree di_ftype_di_di
17357 = build_function_type (long_long_unsigned_type_node,
17358 tree_cons (NULL_TREE, long_long_unsigned_type_node,
17359 tree_cons (NULL_TREE,
17360 long_long_unsigned_type_node,
17363 /* Add all builtins that are more or less simple operations on two
17365 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17367 /* Use one of the operands; the target can have a different mode for
17368 mask-generating compares. */
17369 enum machine_mode mode;
17375 mode = insn_data[d->icode].operand[1].mode;
17380 type = v8qi_ftype_v8qi_v8qi;
17383 type = v4hi_ftype_v4hi_v4hi;
17386 type = v2si_ftype_v2si_v2si;
17389 type = di_ftype_di_di;
17393 gcc_unreachable ();
17396 def_mbuiltin (d->mask, d->name, type, d->code);
17399 /* Add the remaining MMX insns with somewhat more complicated types. */
17400 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
17401 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
17402 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
17404 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
17405 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
17406 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
17407 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
17408 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
17409 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
17411 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
17412 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
17413 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
17414 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
17415 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
17416 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
17418 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
17419 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
17420 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
17421 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
17422 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
17423 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
17425 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
17426 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
17427 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
17428 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
17429 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
17430 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
17432 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
17434 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
17435 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
17436 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
17437 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
17439 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
17440 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
17441 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
17442 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
17443 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
17444 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
17445 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
17446 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
17447 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
17449 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
17450 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
17451 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
17453 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
17454 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
17455 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
17457 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
17458 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
17459 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
17460 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
17461 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
17462 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
17464 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
17465 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
17466 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
17467 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
17468 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
17469 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
17470 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
17471 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
17472 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
17473 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
17474 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
17475 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
17477 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
17478 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
17479 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
17480 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
17482 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
17483 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
17484 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
17485 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
17486 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
17487 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
17488 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
17492 arm_init_tls_builtins (void)
17496 ftype = build_function_type (ptr_type_node, void_list_node);
17497 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
17498 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
17500 TREE_NOTHROW (decl) = 1;
17501 TREE_READONLY (decl) = 1;
17504 enum neon_builtin_type_bits {
17520 #define v8qi_UP T_V8QI
17521 #define v4hi_UP T_V4HI
17522 #define v2si_UP T_V2SI
17523 #define v2sf_UP T_V2SF
17525 #define v16qi_UP T_V16QI
17526 #define v8hi_UP T_V8HI
17527 #define v4si_UP T_V4SI
17528 #define v4sf_UP T_V4SF
17529 #define v2di_UP T_V2DI
17534 #define UP(X) X##_UP
17569 NEON_LOADSTRUCTLANE,
17571 NEON_STORESTRUCTLANE,
17580 const neon_itype itype;
17582 const enum insn_code codes[T_MAX];
17583 const unsigned int num_vars;
17584 unsigned int base_fcode;
17585 } neon_builtin_datum;
17587 #define CF(N,X) CODE_FOR_neon_##N##X
17589 #define VAR1(T, N, A) \
17590 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
17591 #define VAR2(T, N, A, B) \
17592 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
17593 #define VAR3(T, N, A, B, C) \
17594 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
17595 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
17596 #define VAR4(T, N, A, B, C, D) \
17597 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
17598 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
17599 #define VAR5(T, N, A, B, C, D, E) \
17600 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
17601 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
17602 #define VAR6(T, N, A, B, C, D, E, F) \
17603 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
17604 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
17605 #define VAR7(T, N, A, B, C, D, E, F, G) \
17606 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
17607 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17609 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17610 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17612 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17613 CF (N, G), CF (N, H) }, 8, 0
17614 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17615 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17616 | UP (H) | UP (I), \
17617 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17618 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
17619 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17620 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17621 | UP (H) | UP (I) | UP (J), \
17622 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17623 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
17625 /* The mode entries in the following table correspond to the "key" type of the
17626 instruction variant, i.e. equivalent to that which would be specified after
17627 the assembler mnemonic, which usually refers to the last vector operand.
17628 (Signed/unsigned/polynomial types are not differentiated between though, and
17629 are all mapped onto the same mode for a given element size.) The modes
17630 listed per instruction should be the same as those defined for that
17631 instruction's pattern in neon.md.
17632 WARNING: Variants should be listed in the same increasing order as
17633 neon_builtin_type_bits. */
17635 static neon_builtin_datum neon_builtin_data[] =
17637 { VAR10 (BINOP, vadd,
17638 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17639 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
17640 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
17641 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17642 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17643 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
17644 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17645 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17646 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
17647 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17648 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
17649 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
17650 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
17651 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
17652 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
17653 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
17654 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
17655 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
17656 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
17657 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
17658 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
17659 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
17660 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17661 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17662 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17663 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
17664 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
17665 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
17666 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17667 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17668 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17669 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
17670 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17671 { VAR10 (BINOP, vsub,
17672 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17673 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
17674 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
17675 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17676 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17677 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
17678 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17679 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17680 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17681 { VAR2 (BINOP, vcage, v2sf, v4sf) },
17682 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
17683 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17684 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17685 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
17686 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17687 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
17688 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17689 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17690 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
17691 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17692 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17693 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
17694 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
17695 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
17696 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
17697 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17698 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17699 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17700 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17701 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17702 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17703 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17704 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17705 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
17706 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
17707 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
17708 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17709 /* FIXME: vget_lane supports more variants than this! */
17710 { VAR10 (GETLANE, vget_lane,
17711 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17712 { VAR10 (SETLANE, vset_lane,
17713 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17714 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
17715 { VAR10 (DUP, vdup_n,
17716 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17717 { VAR10 (DUPLANE, vdup_lane,
17718 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17719 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
17720 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
17721 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
17722 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
17723 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
17724 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
17725 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
17726 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17727 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17728 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
17729 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
17730 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17731 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
17732 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
17733 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17734 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17735 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
17736 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
17737 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17738 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
17739 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
17740 { VAR10 (BINOP, vext,
17741 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17742 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17743 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
17744 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
17745 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
17746 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
17747 { VAR10 (SELECT, vbsl,
17748 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17749 { VAR1 (VTBL, vtbl1, v8qi) },
17750 { VAR1 (VTBL, vtbl2, v8qi) },
17751 { VAR1 (VTBL, vtbl3, v8qi) },
17752 { VAR1 (VTBL, vtbl4, v8qi) },
17753 { VAR1 (VTBX, vtbx1, v8qi) },
17754 { VAR1 (VTBX, vtbx2, v8qi) },
17755 { VAR1 (VTBX, vtbx3, v8qi) },
17756 { VAR1 (VTBX, vtbx4, v8qi) },
17757 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17758 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17759 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17760 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
17761 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
17762 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
17763 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
17764 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
17765 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
17766 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
17767 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
17768 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
17769 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
17770 { VAR10 (LOAD1, vld1,
17771 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17772 { VAR10 (LOAD1LANE, vld1_lane,
17773 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17774 { VAR10 (LOAD1, vld1_dup,
17775 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17776 { VAR10 (STORE1, vst1,
17777 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17778 { VAR10 (STORE1LANE, vst1_lane,
17779 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17780 { VAR9 (LOADSTRUCT,
17781 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17782 { VAR7 (LOADSTRUCTLANE, vld2_lane,
17783 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17784 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
17785 { VAR9 (STORESTRUCT, vst2,
17786 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17787 { VAR7 (STORESTRUCTLANE, vst2_lane,
17788 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17789 { VAR9 (LOADSTRUCT,
17790 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17791 { VAR7 (LOADSTRUCTLANE, vld3_lane,
17792 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17793 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
17794 { VAR9 (STORESTRUCT, vst3,
17795 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17796 { VAR7 (STORESTRUCTLANE, vst3_lane,
17797 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17798 { VAR9 (LOADSTRUCT, vld4,
17799 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17800 { VAR7 (LOADSTRUCTLANE, vld4_lane,
17801 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17802 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
17803 { VAR9 (STORESTRUCT, vst4,
17804 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17805 { VAR7 (STORESTRUCTLANE, vst4_lane,
17806 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17807 { VAR10 (LOGICBINOP, vand,
17808 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17809 { VAR10 (LOGICBINOP, vorr,
17810 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17811 { VAR10 (BINOP, veor,
17812 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17813 { VAR10 (LOGICBINOP, vbic,
17814 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17815 { VAR10 (LOGICBINOP, vorn,
17816 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
17832 arm_init_neon_builtins (void)
17834 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
17836 tree neon_intQI_type_node;
17837 tree neon_intHI_type_node;
17838 tree neon_polyQI_type_node;
17839 tree neon_polyHI_type_node;
17840 tree neon_intSI_type_node;
17841 tree neon_intDI_type_node;
17842 tree neon_float_type_node;
17844 tree intQI_pointer_node;
17845 tree intHI_pointer_node;
17846 tree intSI_pointer_node;
17847 tree intDI_pointer_node;
17848 tree float_pointer_node;
17850 tree const_intQI_node;
17851 tree const_intHI_node;
17852 tree const_intSI_node;
17853 tree const_intDI_node;
17854 tree const_float_node;
17856 tree const_intQI_pointer_node;
17857 tree const_intHI_pointer_node;
17858 tree const_intSI_pointer_node;
17859 tree const_intDI_pointer_node;
17860 tree const_float_pointer_node;
17862 tree V8QI_type_node;
17863 tree V4HI_type_node;
17864 tree V2SI_type_node;
17865 tree V2SF_type_node;
17866 tree V16QI_type_node;
17867 tree V8HI_type_node;
17868 tree V4SI_type_node;
17869 tree V4SF_type_node;
17870 tree V2DI_type_node;
17872 tree intUQI_type_node;
17873 tree intUHI_type_node;
17874 tree intUSI_type_node;
17875 tree intUDI_type_node;
17877 tree intEI_type_node;
17878 tree intOI_type_node;
17879 tree intCI_type_node;
17880 tree intXI_type_node;
17882 tree V8QI_pointer_node;
17883 tree V4HI_pointer_node;
17884 tree V2SI_pointer_node;
17885 tree V2SF_pointer_node;
17886 tree V16QI_pointer_node;
17887 tree V8HI_pointer_node;
17888 tree V4SI_pointer_node;
17889 tree V4SF_pointer_node;
17890 tree V2DI_pointer_node;
17892 tree void_ftype_pv8qi_v8qi_v8qi;
17893 tree void_ftype_pv4hi_v4hi_v4hi;
17894 tree void_ftype_pv2si_v2si_v2si;
17895 tree void_ftype_pv2sf_v2sf_v2sf;
17896 tree void_ftype_pdi_di_di;
17897 tree void_ftype_pv16qi_v16qi_v16qi;
17898 tree void_ftype_pv8hi_v8hi_v8hi;
17899 tree void_ftype_pv4si_v4si_v4si;
17900 tree void_ftype_pv4sf_v4sf_v4sf;
17901 tree void_ftype_pv2di_v2di_v2di;
17903 tree reinterp_ftype_dreg[5][5];
17904 tree reinterp_ftype_qreg[5][5];
17905 tree dreg_types[5], qreg_types[5];
17907 /* Create distinguished type nodes for NEON vector element types,
17908 and pointers to values of such types, so we can detect them later. */
17909 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17910 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17911 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17912 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17913 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
17914 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
17915 neon_float_type_node = make_node (REAL_TYPE);
17916 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
17917 layout_type (neon_float_type_node);
17919 /* Define typedefs which exactly correspond to the modes we are basing vector
17920 types on. If you change these names you'll need to change
17921 the table used by arm_mangle_type too. */
17922 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
17923 "__builtin_neon_qi");
17924 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
17925 "__builtin_neon_hi");
17926 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
17927 "__builtin_neon_si");
17928 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
17929 "__builtin_neon_sf");
17930 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
17931 "__builtin_neon_di");
17932 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
17933 "__builtin_neon_poly8");
17934 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
17935 "__builtin_neon_poly16");
17937 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
17938 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
17939 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
17940 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
17941 float_pointer_node = build_pointer_type (neon_float_type_node);
17943 /* Next create constant-qualified versions of the above types. */
17944 const_intQI_node = build_qualified_type (neon_intQI_type_node,
17946 const_intHI_node = build_qualified_type (neon_intHI_type_node,
17948 const_intSI_node = build_qualified_type (neon_intSI_type_node,
17950 const_intDI_node = build_qualified_type (neon_intDI_type_node,
17952 const_float_node = build_qualified_type (neon_float_type_node,
17955 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
17956 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
17957 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
17958 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
17959 const_float_pointer_node = build_pointer_type (const_float_node);
17961 /* Now create vector types based on our NEON element types. */
17962 /* 64-bit vectors. */
17964 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
17966 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
17968 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
17970 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
17971 /* 128-bit vectors. */
17973 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
17975 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
17977 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
17979 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
17981 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
17983 /* Unsigned integer types for various mode sizes. */
17984 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
17985 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
17986 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
17987 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
17989 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
17990 "__builtin_neon_uqi");
17991 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
17992 "__builtin_neon_uhi");
17993 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
17994 "__builtin_neon_usi");
17995 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
17996 "__builtin_neon_udi");
17998 /* Opaque integer types for structures of vectors. */
17999 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18000 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18001 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18002 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18004 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18005 "__builtin_neon_ti");
18006 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18007 "__builtin_neon_ei");
18008 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18009 "__builtin_neon_oi");
18010 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18011 "__builtin_neon_ci");
18012 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18013 "__builtin_neon_xi");
18015 /* Pointers to vector types. */
18016 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18017 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18018 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18019 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18020 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18021 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18022 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18023 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18024 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18026 /* Operations which return results as pairs. */
18027 void_ftype_pv8qi_v8qi_v8qi =
18028 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18029 V8QI_type_node, NULL);
18030 void_ftype_pv4hi_v4hi_v4hi =
18031 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18032 V4HI_type_node, NULL);
18033 void_ftype_pv2si_v2si_v2si =
18034 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18035 V2SI_type_node, NULL);
18036 void_ftype_pv2sf_v2sf_v2sf =
18037 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18038 V2SF_type_node, NULL);
18039 void_ftype_pdi_di_di =
18040 build_function_type_list (void_type_node, intDI_pointer_node,
18041 neon_intDI_type_node, neon_intDI_type_node, NULL);
18042 void_ftype_pv16qi_v16qi_v16qi =
18043 build_function_type_list (void_type_node, V16QI_pointer_node,
18044 V16QI_type_node, V16QI_type_node, NULL);
18045 void_ftype_pv8hi_v8hi_v8hi =
18046 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18047 V8HI_type_node, NULL);
18048 void_ftype_pv4si_v4si_v4si =
18049 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18050 V4SI_type_node, NULL);
18051 void_ftype_pv4sf_v4sf_v4sf =
18052 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18053 V4SF_type_node, NULL);
18054 void_ftype_pv2di_v2di_v2di =
18055 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18056 V2DI_type_node, NULL);
18058 dreg_types[0] = V8QI_type_node;
18059 dreg_types[1] = V4HI_type_node;
18060 dreg_types[2] = V2SI_type_node;
18061 dreg_types[3] = V2SF_type_node;
18062 dreg_types[4] = neon_intDI_type_node;
18064 qreg_types[0] = V16QI_type_node;
18065 qreg_types[1] = V8HI_type_node;
18066 qreg_types[2] = V4SI_type_node;
18067 qreg_types[3] = V4SF_type_node;
18068 qreg_types[4] = V2DI_type_node;
18070 for (i = 0; i < 5; i++)
18073 for (j = 0; j < 5; j++)
18075 reinterp_ftype_dreg[i][j]
18076 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18077 reinterp_ftype_qreg[i][j]
18078 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18082 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18084 neon_builtin_datum *d = &neon_builtin_data[i];
18085 unsigned int j, codeidx = 0;
18087 d->base_fcode = fcode;
18089 for (j = 0; j < T_MAX; j++)
18091 const char* const modenames[] = {
18092 "v8qi", "v4hi", "v2si", "v2sf", "di",
18093 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18097 enum insn_code icode;
18098 int is_load = 0, is_store = 0;
18100 if ((d->bits & (1 << j)) == 0)
18103 icode = d->codes[codeidx++];
18108 case NEON_LOAD1LANE:
18109 case NEON_LOADSTRUCT:
18110 case NEON_LOADSTRUCTLANE:
18112 /* Fall through. */
18114 case NEON_STORE1LANE:
18115 case NEON_STORESTRUCT:
18116 case NEON_STORESTRUCTLANE:
18119 /* Fall through. */
18122 case NEON_LOGICBINOP:
18123 case NEON_SHIFTINSERT:
18130 case NEON_SHIFTIMM:
18131 case NEON_SHIFTACC:
18137 case NEON_LANEMULL:
18138 case NEON_LANEMULH:
18140 case NEON_SCALARMUL:
18141 case NEON_SCALARMULL:
18142 case NEON_SCALARMULH:
18143 case NEON_SCALARMAC:
18149 tree return_type = void_type_node, args = void_list_node;
18151 /* Build a function type directly from the insn_data for this
18152 builtin. The build_function_type() function takes care of
18153 removing duplicates for us. */
18154 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
18158 if (is_load && k == 1)
18160 /* Neon load patterns always have the memory operand
18161 (a SImode pointer) in the operand 1 position. We
18162 want a const pointer to the element type in that
18164 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18170 eltype = const_intQI_pointer_node;
18175 eltype = const_intHI_pointer_node;
18180 eltype = const_intSI_pointer_node;
18185 eltype = const_float_pointer_node;
18190 eltype = const_intDI_pointer_node;
18193 default: gcc_unreachable ();
18196 else if (is_store && k == 0)
18198 /* Similarly, Neon store patterns use operand 0 as
18199 the memory location to store to (a SImode pointer).
18200 Use a pointer to the element type of the store in
18202 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18208 eltype = intQI_pointer_node;
18213 eltype = intHI_pointer_node;
18218 eltype = intSI_pointer_node;
18223 eltype = float_pointer_node;
18228 eltype = intDI_pointer_node;
18231 default: gcc_unreachable ();
18236 switch (insn_data[icode].operand[k].mode)
18238 case VOIDmode: eltype = void_type_node; break;
18240 case QImode: eltype = neon_intQI_type_node; break;
18241 case HImode: eltype = neon_intHI_type_node; break;
18242 case SImode: eltype = neon_intSI_type_node; break;
18243 case SFmode: eltype = neon_float_type_node; break;
18244 case DImode: eltype = neon_intDI_type_node; break;
18245 case TImode: eltype = intTI_type_node; break;
18246 case EImode: eltype = intEI_type_node; break;
18247 case OImode: eltype = intOI_type_node; break;
18248 case CImode: eltype = intCI_type_node; break;
18249 case XImode: eltype = intXI_type_node; break;
18250 /* 64-bit vectors. */
18251 case V8QImode: eltype = V8QI_type_node; break;
18252 case V4HImode: eltype = V4HI_type_node; break;
18253 case V2SImode: eltype = V2SI_type_node; break;
18254 case V2SFmode: eltype = V2SF_type_node; break;
18255 /* 128-bit vectors. */
18256 case V16QImode: eltype = V16QI_type_node; break;
18257 case V8HImode: eltype = V8HI_type_node; break;
18258 case V4SImode: eltype = V4SI_type_node; break;
18259 case V4SFmode: eltype = V4SF_type_node; break;
18260 case V2DImode: eltype = V2DI_type_node; break;
18261 default: gcc_unreachable ();
18265 if (k == 0 && !is_store)
18266 return_type = eltype;
18268 args = tree_cons (NULL_TREE, eltype, args);
18271 ftype = build_function_type (return_type, args);
18275 case NEON_RESULTPAIR:
18277 switch (insn_data[icode].operand[1].mode)
18279 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18280 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18281 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18282 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18283 case DImode: ftype = void_ftype_pdi_di_di; break;
18284 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18285 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18286 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18287 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18288 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18289 default: gcc_unreachable ();
18294 case NEON_REINTERP:
18296 /* We iterate over 5 doubleword types, then 5 quadword
18299 switch (insn_data[icode].operand[0].mode)
18301 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18302 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
18303 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
18304 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
18305 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
18306 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
18307 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
18308 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
18309 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
18310 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
18311 default: gcc_unreachable ();
18317 gcc_unreachable ();
18320 gcc_assert (ftype != NULL);
18322 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
18324 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
18331 arm_init_fp16_builtins (void)
18333 tree fp16_type = make_node (REAL_TYPE);
18334 TYPE_PRECISION (fp16_type) = 16;
18335 layout_type (fp16_type);
18336 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
18340 arm_init_builtins (void)
18342 arm_init_tls_builtins ();
18344 if (TARGET_REALLY_IWMMXT)
18345 arm_init_iwmmxt_builtins ();
18348 arm_init_neon_builtins ();
18350 if (arm_fp16_format)
18351 arm_init_fp16_builtins ();
18354 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18356 static const char *
18357 arm_invalid_parameter_type (const_tree t)
18359 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18360 return N_("function parameters cannot have __fp16 type");
18364 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18366 static const char *
18367 arm_invalid_return_type (const_tree t)
18369 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18370 return N_("functions cannot return __fp16 type");
18374 /* Implement TARGET_PROMOTED_TYPE. */
18377 arm_promoted_type (const_tree t)
18379 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18380 return float_type_node;
18384 /* Implement TARGET_CONVERT_TO_TYPE.
18385 Specifically, this hook implements the peculiarity of the ARM
18386 half-precision floating-point C semantics that requires conversions between
18387 __fp16 to or from double to do an intermediate conversion to float. */
18390 arm_convert_to_type (tree type, tree expr)
18392 tree fromtype = TREE_TYPE (expr);
18393 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
18395 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
18396 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
18397 return convert (type, convert (float_type_node, expr));
18401 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
18402 This simply adds HFmode as a supported mode; even though we don't
18403 implement arithmetic on this type directly, it's supported by
18404 optabs conversions, much the way the double-word arithmetic is
18405 special-cased in the default hook. */
18408 arm_scalar_mode_supported_p (enum machine_mode mode)
18410 if (mode == HFmode)
18411 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
18413 return default_scalar_mode_supported_p (mode);
18416 /* Errors in the source file can cause expand_expr to return const0_rtx
18417 where we expect a vector. To avoid crashing, use one of the vector
18418 clear instructions. */
18421 safe_vector_operand (rtx x, enum machine_mode mode)
18423 if (x != const0_rtx)
18425 x = gen_reg_rtx (mode);
18427 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
18428 : gen_rtx_SUBREG (DImode, x, 0)));
18432 /* Subroutine of arm_expand_builtin to take care of binop insns. */
18435 arm_expand_binop_builtin (enum insn_code icode,
18436 tree exp, rtx target)
18439 tree arg0 = CALL_EXPR_ARG (exp, 0);
18440 tree arg1 = CALL_EXPR_ARG (exp, 1);
18441 rtx op0 = expand_normal (arg0);
18442 rtx op1 = expand_normal (arg1);
18443 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18444 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18445 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18447 if (VECTOR_MODE_P (mode0))
18448 op0 = safe_vector_operand (op0, mode0);
18449 if (VECTOR_MODE_P (mode1))
18450 op1 = safe_vector_operand (op1, mode1);
18453 || GET_MODE (target) != tmode
18454 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18455 target = gen_reg_rtx (tmode);
18457 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
18459 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18460 op0 = copy_to_mode_reg (mode0, op0);
18461 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18462 op1 = copy_to_mode_reg (mode1, op1);
18464 pat = GEN_FCN (icode) (target, op0, op1);
18471 /* Subroutine of arm_expand_builtin to take care of unop insns. */
18474 arm_expand_unop_builtin (enum insn_code icode,
18475 tree exp, rtx target, int do_load)
18478 tree arg0 = CALL_EXPR_ARG (exp, 0);
18479 rtx op0 = expand_normal (arg0);
18480 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18481 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18484 || GET_MODE (target) != tmode
18485 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18486 target = gen_reg_rtx (tmode);
18488 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18491 if (VECTOR_MODE_P (mode0))
18492 op0 = safe_vector_operand (op0, mode0);
18494 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18495 op0 = copy_to_mode_reg (mode0, op0);
18498 pat = GEN_FCN (icode) (target, op0);
18506 neon_builtin_compare (const void *a, const void *b)
18508 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
18509 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
18510 unsigned int soughtcode = key->base_fcode;
18512 if (soughtcode >= memb->base_fcode
18513 && soughtcode < memb->base_fcode + memb->num_vars)
18515 else if (soughtcode < memb->base_fcode)
18521 static enum insn_code
18522 locate_neon_builtin_icode (int fcode, neon_itype *itype)
18524 neon_builtin_datum key, *found;
18527 key.base_fcode = fcode;
18528 found = (neon_builtin_datum *)
18529 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
18530 sizeof (neon_builtin_data[0]), neon_builtin_compare);
18531 gcc_assert (found);
18532 idx = fcode - (int) found->base_fcode;
18533 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
18536 *itype = found->itype;
18538 return found->codes[idx];
18542 NEON_ARG_COPY_TO_REG,
18547 #define NEON_MAX_BUILTIN_ARGS 5
18549 /* Expand a Neon builtin. */
18551 arm_expand_neon_args (rtx target, int icode, int have_retval,
18556 tree arg[NEON_MAX_BUILTIN_ARGS];
18557 rtx op[NEON_MAX_BUILTIN_ARGS];
18558 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18559 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
18564 || GET_MODE (target) != tmode
18565 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
18566 target = gen_reg_rtx (tmode);
18568 va_start (ap, exp);
18572 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
18574 if (thisarg == NEON_ARG_STOP)
18578 arg[argc] = CALL_EXPR_ARG (exp, argc);
18579 op[argc] = expand_normal (arg[argc]);
18580 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
18584 case NEON_ARG_COPY_TO_REG:
18585 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
18586 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18587 (op[argc], mode[argc]))
18588 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
18591 case NEON_ARG_CONSTANT:
18592 /* FIXME: This error message is somewhat unhelpful. */
18593 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18594 (op[argc], mode[argc]))
18595 error ("argument must be a constant");
18598 case NEON_ARG_STOP:
18599 gcc_unreachable ();
18612 pat = GEN_FCN (icode) (target, op[0]);
18616 pat = GEN_FCN (icode) (target, op[0], op[1]);
18620 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
18624 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
18628 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
18632 gcc_unreachable ();
18638 pat = GEN_FCN (icode) (op[0]);
18642 pat = GEN_FCN (icode) (op[0], op[1]);
18646 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
18650 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
18654 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
18658 gcc_unreachable ();
18669 /* Expand a Neon builtin. These are "special" because they don't have symbolic
18670 constants defined per-instruction or per instruction-variant. Instead, the
18671 required info is looked up in the table neon_builtin_data. */
18673 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
18676 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
18683 return arm_expand_neon_args (target, icode, 1, exp,
18684 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18688 case NEON_SCALARMUL:
18689 case NEON_SCALARMULL:
18690 case NEON_SCALARMULH:
18691 case NEON_SHIFTINSERT:
18692 case NEON_LOGICBINOP:
18693 return arm_expand_neon_args (target, icode, 1, exp,
18694 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18698 return arm_expand_neon_args (target, icode, 1, exp,
18699 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18700 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18704 case NEON_SHIFTIMM:
18705 return arm_expand_neon_args (target, icode, 1, exp,
18706 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
18710 return arm_expand_neon_args (target, icode, 1, exp,
18711 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18715 case NEON_REINTERP:
18716 return arm_expand_neon_args (target, icode, 1, exp,
18717 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18721 return arm_expand_neon_args (target, icode, 1, exp,
18722 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18724 case NEON_RESULTPAIR:
18725 return arm_expand_neon_args (target, icode, 0, exp,
18726 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18730 case NEON_LANEMULL:
18731 case NEON_LANEMULH:
18732 return arm_expand_neon_args (target, icode, 1, exp,
18733 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18734 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18737 return arm_expand_neon_args (target, icode, 1, exp,
18738 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18739 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18741 case NEON_SHIFTACC:
18742 return arm_expand_neon_args (target, icode, 1, exp,
18743 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18744 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18746 case NEON_SCALARMAC:
18747 return arm_expand_neon_args (target, icode, 1, exp,
18748 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18749 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18753 return arm_expand_neon_args (target, icode, 1, exp,
18754 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18758 case NEON_LOADSTRUCT:
18759 return arm_expand_neon_args (target, icode, 1, exp,
18760 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18762 case NEON_LOAD1LANE:
18763 case NEON_LOADSTRUCTLANE:
18764 return arm_expand_neon_args (target, icode, 1, exp,
18765 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18769 case NEON_STORESTRUCT:
18770 return arm_expand_neon_args (target, icode, 0, exp,
18771 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18773 case NEON_STORE1LANE:
18774 case NEON_STORESTRUCTLANE:
18775 return arm_expand_neon_args (target, icode, 0, exp,
18776 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18780 gcc_unreachable ();
18783 /* Emit code to reinterpret one Neon type as another, without altering bits. */
18785 neon_reinterpret (rtx dest, rtx src)
18787 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
18790 /* Emit code to place a Neon pair result in memory locations (with equal
18793 neon_emit_pair_result_insn (enum machine_mode mode,
18794 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
18797 rtx mem = gen_rtx_MEM (mode, destaddr);
18798 rtx tmp1 = gen_reg_rtx (mode);
18799 rtx tmp2 = gen_reg_rtx (mode);
18801 emit_insn (intfn (tmp1, op1, tmp2, op2));
18803 emit_move_insn (mem, tmp1);
18804 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
18805 emit_move_insn (mem, tmp2);
18808 /* Set up operands for a register copy from src to dest, taking care not to
18809 clobber registers in the process.
18810 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
18811 be called with a large N, so that should be OK. */
18814 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
18816 unsigned int copied = 0, opctr = 0;
18817 unsigned int done = (1 << count) - 1;
18820 while (copied != done)
18822 for (i = 0; i < count; i++)
18826 for (j = 0; good && j < count; j++)
18827 if (i != j && (copied & (1 << j)) == 0
18828 && reg_overlap_mentioned_p (src[j], dest[i]))
18833 operands[opctr++] = dest[i];
18834 operands[opctr++] = src[i];
18840 gcc_assert (opctr == count * 2);
18843 /* Expand an expression EXP that calls a built-in function,
18844 with result going to TARGET if that's convenient
18845 (and in mode MODE if that's convenient).
18846 SUBTARGET may be used as the target for computing one of EXP's operands.
18847 IGNORE is nonzero if the value is to be ignored. */
18850 arm_expand_builtin (tree exp,
18852 rtx subtarget ATTRIBUTE_UNUSED,
18853 enum machine_mode mode ATTRIBUTE_UNUSED,
18854 int ignore ATTRIBUTE_UNUSED)
18856 const struct builtin_description * d;
18857 enum insn_code icode;
18858 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
18866 int fcode = DECL_FUNCTION_CODE (fndecl);
18868 enum machine_mode tmode;
18869 enum machine_mode mode0;
18870 enum machine_mode mode1;
18871 enum machine_mode mode2;
18873 if (fcode >= ARM_BUILTIN_NEON_BASE)
18874 return arm_expand_neon_builtin (fcode, exp, target);
18878 case ARM_BUILTIN_TEXTRMSB:
18879 case ARM_BUILTIN_TEXTRMUB:
18880 case ARM_BUILTIN_TEXTRMSH:
18881 case ARM_BUILTIN_TEXTRMUH:
18882 case ARM_BUILTIN_TEXTRMSW:
18883 case ARM_BUILTIN_TEXTRMUW:
18884 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
18885 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
18886 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
18887 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
18888 : CODE_FOR_iwmmxt_textrmw);
18890 arg0 = CALL_EXPR_ARG (exp, 0);
18891 arg1 = CALL_EXPR_ARG (exp, 1);
18892 op0 = expand_normal (arg0);
18893 op1 = expand_normal (arg1);
18894 tmode = insn_data[icode].operand[0].mode;
18895 mode0 = insn_data[icode].operand[1].mode;
18896 mode1 = insn_data[icode].operand[2].mode;
18898 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18899 op0 = copy_to_mode_reg (mode0, op0);
18900 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18902 /* @@@ better error message */
18903 error ("selector must be an immediate");
18904 return gen_reg_rtx (tmode);
18907 || GET_MODE (target) != tmode
18908 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18909 target = gen_reg_rtx (tmode);
18910 pat = GEN_FCN (icode) (target, op0, op1);
18916 case ARM_BUILTIN_TINSRB:
18917 case ARM_BUILTIN_TINSRH:
18918 case ARM_BUILTIN_TINSRW:
18919 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
18920 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
18921 : CODE_FOR_iwmmxt_tinsrw);
18922 arg0 = CALL_EXPR_ARG (exp, 0);
18923 arg1 = CALL_EXPR_ARG (exp, 1);
18924 arg2 = CALL_EXPR_ARG (exp, 2);
18925 op0 = expand_normal (arg0);
18926 op1 = expand_normal (arg1);
18927 op2 = expand_normal (arg2);
18928 tmode = insn_data[icode].operand[0].mode;
18929 mode0 = insn_data[icode].operand[1].mode;
18930 mode1 = insn_data[icode].operand[2].mode;
18931 mode2 = insn_data[icode].operand[3].mode;
18933 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18934 op0 = copy_to_mode_reg (mode0, op0);
18935 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18936 op1 = copy_to_mode_reg (mode1, op1);
18937 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18939 /* @@@ better error message */
18940 error ("selector must be an immediate");
18944 || GET_MODE (target) != tmode
18945 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18946 target = gen_reg_rtx (tmode);
18947 pat = GEN_FCN (icode) (target, op0, op1, op2);
18953 case ARM_BUILTIN_SETWCX:
18954 arg0 = CALL_EXPR_ARG (exp, 0);
18955 arg1 = CALL_EXPR_ARG (exp, 1);
18956 op0 = force_reg (SImode, expand_normal (arg0));
18957 op1 = expand_normal (arg1);
18958 emit_insn (gen_iwmmxt_tmcr (op1, op0));
18961 case ARM_BUILTIN_GETWCX:
18962 arg0 = CALL_EXPR_ARG (exp, 0);
18963 op0 = expand_normal (arg0);
18964 target = gen_reg_rtx (SImode);
18965 emit_insn (gen_iwmmxt_tmrc (target, op0));
18968 case ARM_BUILTIN_WSHUFH:
18969 icode = CODE_FOR_iwmmxt_wshufh;
18970 arg0 = CALL_EXPR_ARG (exp, 0);
18971 arg1 = CALL_EXPR_ARG (exp, 1);
18972 op0 = expand_normal (arg0);
18973 op1 = expand_normal (arg1);
18974 tmode = insn_data[icode].operand[0].mode;
18975 mode1 = insn_data[icode].operand[1].mode;
18976 mode2 = insn_data[icode].operand[2].mode;
18978 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18979 op0 = copy_to_mode_reg (mode1, op0);
18980 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18982 /* @@@ better error message */
18983 error ("mask must be an immediate");
18987 || GET_MODE (target) != tmode
18988 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18989 target = gen_reg_rtx (tmode);
18990 pat = GEN_FCN (icode) (target, op0, op1);
18996 case ARM_BUILTIN_WSADB:
18997 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
18998 case ARM_BUILTIN_WSADH:
18999 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19000 case ARM_BUILTIN_WSADBZ:
19001 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19002 case ARM_BUILTIN_WSADHZ:
19003 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19005 /* Several three-argument builtins. */
19006 case ARM_BUILTIN_WMACS:
19007 case ARM_BUILTIN_WMACU:
19008 case ARM_BUILTIN_WALIGN:
19009 case ARM_BUILTIN_TMIA:
19010 case ARM_BUILTIN_TMIAPH:
19011 case ARM_BUILTIN_TMIATT:
19012 case ARM_BUILTIN_TMIATB:
19013 case ARM_BUILTIN_TMIABT:
19014 case ARM_BUILTIN_TMIABB:
19015 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19016 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19017 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19018 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19019 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19020 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19021 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19022 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19023 : CODE_FOR_iwmmxt_walign);
19024 arg0 = CALL_EXPR_ARG (exp, 0);
19025 arg1 = CALL_EXPR_ARG (exp, 1);
19026 arg2 = CALL_EXPR_ARG (exp, 2);
19027 op0 = expand_normal (arg0);
19028 op1 = expand_normal (arg1);
19029 op2 = expand_normal (arg2);
19030 tmode = insn_data[icode].operand[0].mode;
19031 mode0 = insn_data[icode].operand[1].mode;
19032 mode1 = insn_data[icode].operand[2].mode;
19033 mode2 = insn_data[icode].operand[3].mode;
19035 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19036 op0 = copy_to_mode_reg (mode0, op0);
19037 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19038 op1 = copy_to_mode_reg (mode1, op1);
19039 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19040 op2 = copy_to_mode_reg (mode2, op2);
19042 || GET_MODE (target) != tmode
19043 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19044 target = gen_reg_rtx (tmode);
19045 pat = GEN_FCN (icode) (target, op0, op1, op2);
19051 case ARM_BUILTIN_WZERO:
19052 target = gen_reg_rtx (DImode);
19053 emit_insn (gen_iwmmxt_clrdi (target));
19056 case ARM_BUILTIN_THREAD_POINTER:
19057 return arm_load_tp (target);
19063 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19064 if (d->code == (const enum arm_builtins) fcode)
19065 return arm_expand_binop_builtin (d->icode, exp, target);
19067 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19068 if (d->code == (const enum arm_builtins) fcode)
19069 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19071 /* @@@ Should really do something sensible here. */
19075 /* Return the number (counting from 0) of
19076 the least significant set bit in MASK. */
19079 number_of_first_bit_set (unsigned mask)
19084 (mask & (1 << bit)) == 0;
19091 /* Emit code to push or pop registers to or from the stack. F is the
19092 assembly file. MASK is the registers to push or pop. PUSH is
19093 nonzero if we should push, and zero if we should pop. For debugging
19094 output, if pushing, adjust CFA_OFFSET by the amount of space added
19095 to the stack. REAL_REGS should have the same number of bits set as
19096 MASK, and will be used instead (in the same order) to describe which
19097 registers were saved - this is used to mark the save slots when we
19098 push high registers after moving them to low registers. */
19100 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19101 unsigned long real_regs)
19104 int lo_mask = mask & 0xFF;
19105 int pushed_words = 0;
19109 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19111 /* Special case. Do not generate a POP PC statement here, do it in
19113 thumb_exit (f, -1);
19117 if (ARM_EABI_UNWIND_TABLES && push)
19119 fprintf (f, "\t.save\t{");
19120 for (regno = 0; regno < 15; regno++)
19122 if (real_regs & (1 << regno))
19124 if (real_regs & ((1 << regno) -1))
19126 asm_fprintf (f, "%r", regno);
19129 fprintf (f, "}\n");
19132 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19134 /* Look at the low registers first. */
19135 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19139 asm_fprintf (f, "%r", regno);
19141 if ((lo_mask & ~1) != 0)
19148 if (push && (mask & (1 << LR_REGNUM)))
19150 /* Catch pushing the LR. */
19154 asm_fprintf (f, "%r", LR_REGNUM);
19158 else if (!push && (mask & (1 << PC_REGNUM)))
19160 /* Catch popping the PC. */
19161 if (TARGET_INTERWORK || TARGET_BACKTRACE
19162 || crtl->calls_eh_return)
19164 /* The PC is never poped directly, instead
19165 it is popped into r3 and then BX is used. */
19166 fprintf (f, "}\n");
19168 thumb_exit (f, -1);
19177 asm_fprintf (f, "%r", PC_REGNUM);
19181 fprintf (f, "}\n");
19183 if (push && pushed_words && dwarf2out_do_frame ())
19185 char *l = dwarf2out_cfi_label (false);
19186 int pushed_mask = real_regs;
19188 *cfa_offset += pushed_words * 4;
19189 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19192 pushed_mask = real_regs;
19193 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19195 if (pushed_mask & 1)
19196 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19201 /* Generate code to return from a thumb function.
19202 If 'reg_containing_return_addr' is -1, then the return address is
19203 actually on the stack, at the stack pointer. */
19205 thumb_exit (FILE *f, int reg_containing_return_addr)
19207 unsigned regs_available_for_popping;
19208 unsigned regs_to_pop;
19210 unsigned available;
19214 int restore_a4 = FALSE;
19216 /* Compute the registers we need to pop. */
19220 if (reg_containing_return_addr == -1)
19222 regs_to_pop |= 1 << LR_REGNUM;
19226 if (TARGET_BACKTRACE)
19228 /* Restore the (ARM) frame pointer and stack pointer. */
19229 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
19233 /* If there is nothing to pop then just emit the BX instruction and
19235 if (pops_needed == 0)
19237 if (crtl->calls_eh_return)
19238 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19240 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19243 /* Otherwise if we are not supporting interworking and we have not created
19244 a backtrace structure and the function was not entered in ARM mode then
19245 just pop the return address straight into the PC. */
19246 else if (!TARGET_INTERWORK
19247 && !TARGET_BACKTRACE
19248 && !is_called_in_ARM_mode (current_function_decl)
19249 && !crtl->calls_eh_return)
19251 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
19255 /* Find out how many of the (return) argument registers we can corrupt. */
19256 regs_available_for_popping = 0;
19258 /* If returning via __builtin_eh_return, the bottom three registers
19259 all contain information needed for the return. */
19260 if (crtl->calls_eh_return)
19264 /* If we can deduce the registers used from the function's
19265 return value. This is more reliable that examining
19266 df_regs_ever_live_p () because that will be set if the register is
19267 ever used in the function, not just if the register is used
19268 to hold a return value. */
19270 if (crtl->return_rtx != 0)
19271 mode = GET_MODE (crtl->return_rtx);
19273 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19275 size = GET_MODE_SIZE (mode);
19279 /* In a void function we can use any argument register.
19280 In a function that returns a structure on the stack
19281 we can use the second and third argument registers. */
19282 if (mode == VOIDmode)
19283 regs_available_for_popping =
19284 (1 << ARG_REGISTER (1))
19285 | (1 << ARG_REGISTER (2))
19286 | (1 << ARG_REGISTER (3));
19288 regs_available_for_popping =
19289 (1 << ARG_REGISTER (2))
19290 | (1 << ARG_REGISTER (3));
19292 else if (size <= 4)
19293 regs_available_for_popping =
19294 (1 << ARG_REGISTER (2))
19295 | (1 << ARG_REGISTER (3));
19296 else if (size <= 8)
19297 regs_available_for_popping =
19298 (1 << ARG_REGISTER (3));
19301 /* Match registers to be popped with registers into which we pop them. */
19302 for (available = regs_available_for_popping,
19303 required = regs_to_pop;
19304 required != 0 && available != 0;
19305 available &= ~(available & - available),
19306 required &= ~(required & - required))
19309 /* If we have any popping registers left over, remove them. */
19311 regs_available_for_popping &= ~available;
19313 /* Otherwise if we need another popping register we can use
19314 the fourth argument register. */
19315 else if (pops_needed)
19317 /* If we have not found any free argument registers and
19318 reg a4 contains the return address, we must move it. */
19319 if (regs_available_for_popping == 0
19320 && reg_containing_return_addr == LAST_ARG_REGNUM)
19322 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19323 reg_containing_return_addr = LR_REGNUM;
19325 else if (size > 12)
19327 /* Register a4 is being used to hold part of the return value,
19328 but we have dire need of a free, low register. */
19331 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
19334 if (reg_containing_return_addr != LAST_ARG_REGNUM)
19336 /* The fourth argument register is available. */
19337 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
19343 /* Pop as many registers as we can. */
19344 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19345 regs_available_for_popping);
19347 /* Process the registers we popped. */
19348 if (reg_containing_return_addr == -1)
19350 /* The return address was popped into the lowest numbered register. */
19351 regs_to_pop &= ~(1 << LR_REGNUM);
19353 reg_containing_return_addr =
19354 number_of_first_bit_set (regs_available_for_popping);
19356 /* Remove this register for the mask of available registers, so that
19357 the return address will not be corrupted by further pops. */
19358 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
19361 /* If we popped other registers then handle them here. */
19362 if (regs_available_for_popping)
19366 /* Work out which register currently contains the frame pointer. */
19367 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
19369 /* Move it into the correct place. */
19370 asm_fprintf (f, "\tmov\t%r, %r\n",
19371 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
19373 /* (Temporarily) remove it from the mask of popped registers. */
19374 regs_available_for_popping &= ~(1 << frame_pointer);
19375 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
19377 if (regs_available_for_popping)
19381 /* We popped the stack pointer as well,
19382 find the register that contains it. */
19383 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
19385 /* Move it into the stack register. */
19386 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
19388 /* At this point we have popped all necessary registers, so
19389 do not worry about restoring regs_available_for_popping
19390 to its correct value:
19392 assert (pops_needed == 0)
19393 assert (regs_available_for_popping == (1 << frame_pointer))
19394 assert (regs_to_pop == (1 << STACK_POINTER)) */
19398 /* Since we have just move the popped value into the frame
19399 pointer, the popping register is available for reuse, and
19400 we know that we still have the stack pointer left to pop. */
19401 regs_available_for_popping |= (1 << frame_pointer);
19405 /* If we still have registers left on the stack, but we no longer have
19406 any registers into which we can pop them, then we must move the return
19407 address into the link register and make available the register that
19409 if (regs_available_for_popping == 0 && pops_needed > 0)
19411 regs_available_for_popping |= 1 << reg_containing_return_addr;
19413 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
19414 reg_containing_return_addr);
19416 reg_containing_return_addr = LR_REGNUM;
19419 /* If we have registers left on the stack then pop some more.
19420 We know that at most we will want to pop FP and SP. */
19421 if (pops_needed > 0)
19426 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19427 regs_available_for_popping);
19429 /* We have popped either FP or SP.
19430 Move whichever one it is into the correct register. */
19431 popped_into = number_of_first_bit_set (regs_available_for_popping);
19432 move_to = number_of_first_bit_set (regs_to_pop);
19434 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
19436 regs_to_pop &= ~(1 << move_to);
19441 /* If we still have not popped everything then we must have only
19442 had one register available to us and we are now popping the SP. */
19443 if (pops_needed > 0)
19447 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19448 regs_available_for_popping);
19450 popped_into = number_of_first_bit_set (regs_available_for_popping);
19452 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
19454 assert (regs_to_pop == (1 << STACK_POINTER))
19455 assert (pops_needed == 1)
19459 /* If necessary restore the a4 register. */
19462 if (reg_containing_return_addr != LR_REGNUM)
19464 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19465 reg_containing_return_addr = LR_REGNUM;
19468 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
19471 if (crtl->calls_eh_return)
19472 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19474 /* Return to caller. */
19475 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19480 thumb1_final_prescan_insn (rtx insn)
19482 if (flag_print_asm_name)
19483 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
19484 INSN_ADDRESSES (INSN_UID (insn)));
19488 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
19490 unsigned HOST_WIDE_INT mask = 0xff;
19493 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
19494 if (val == 0) /* XXX */
19497 for (i = 0; i < 25; i++)
19498 if ((val & (mask << i)) == val)
19504 /* Returns nonzero if the current function contains,
19505 or might contain a far jump. */
19507 thumb_far_jump_used_p (void)
19511 /* This test is only important for leaf functions. */
19512 /* assert (!leaf_function_p ()); */
19514 /* If we have already decided that far jumps may be used,
19515 do not bother checking again, and always return true even if
19516 it turns out that they are not being used. Once we have made
19517 the decision that far jumps are present (and that hence the link
19518 register will be pushed onto the stack) we cannot go back on it. */
19519 if (cfun->machine->far_jump_used)
19522 /* If this function is not being called from the prologue/epilogue
19523 generation code then it must be being called from the
19524 INITIAL_ELIMINATION_OFFSET macro. */
19525 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
19527 /* In this case we know that we are being asked about the elimination
19528 of the arg pointer register. If that register is not being used,
19529 then there are no arguments on the stack, and we do not have to
19530 worry that a far jump might force the prologue to push the link
19531 register, changing the stack offsets. In this case we can just
19532 return false, since the presence of far jumps in the function will
19533 not affect stack offsets.
19535 If the arg pointer is live (or if it was live, but has now been
19536 eliminated and so set to dead) then we do have to test to see if
19537 the function might contain a far jump. This test can lead to some
19538 false negatives, since before reload is completed, then length of
19539 branch instructions is not known, so gcc defaults to returning their
19540 longest length, which in turn sets the far jump attribute to true.
19542 A false negative will not result in bad code being generated, but it
19543 will result in a needless push and pop of the link register. We
19544 hope that this does not occur too often.
19546 If we need doubleword stack alignment this could affect the other
19547 elimination offsets so we can't risk getting it wrong. */
19548 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
19549 cfun->machine->arg_pointer_live = 1;
19550 else if (!cfun->machine->arg_pointer_live)
19554 /* Check to see if the function contains a branch
19555 insn with the far jump attribute set. */
19556 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19558 if (GET_CODE (insn) == JUMP_INSN
19559 /* Ignore tablejump patterns. */
19560 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19561 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
19562 && get_attr_far_jump (insn) == FAR_JUMP_YES
19565 /* Record the fact that we have decided that
19566 the function does use far jumps. */
19567 cfun->machine->far_jump_used = 1;
19575 /* Return nonzero if FUNC must be entered in ARM mode. */
19577 is_called_in_ARM_mode (tree func)
19579 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
19581 /* Ignore the problem about functions whose address is taken. */
19582 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
19586 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
19592 /* Given the stack offsets and register mask in OFFSETS, decide how
19593 many additional registers to push instead of subtracting a constant
19594 from SP. For epilogues the principle is the same except we use pop.
19595 FOR_PROLOGUE indicates which we're generating. */
19597 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
19599 HOST_WIDE_INT amount;
19600 unsigned long live_regs_mask = offsets->saved_regs_mask;
19601 /* Extract a mask of the ones we can give to the Thumb's push/pop
19603 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
19604 /* Then count how many other high registers will need to be pushed. */
19605 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19606 int n_free, reg_base;
19608 if (!for_prologue && frame_pointer_needed)
19609 amount = offsets->locals_base - offsets->saved_regs;
19611 amount = offsets->outgoing_args - offsets->saved_regs;
19613 /* If the stack frame size is 512 exactly, we can save one load
19614 instruction, which should make this a win even when optimizing
19616 if (!optimize_size && amount != 512)
19619 /* Can't do this if there are high registers to push. */
19620 if (high_regs_pushed != 0)
19623 /* Shouldn't do it in the prologue if no registers would normally
19624 be pushed at all. In the epilogue, also allow it if we'll have
19625 a pop insn for the PC. */
19628 || TARGET_BACKTRACE
19629 || (live_regs_mask & 1 << LR_REGNUM) == 0
19630 || TARGET_INTERWORK
19631 || crtl->args.pretend_args_size != 0))
19634 /* Don't do this if thumb_expand_prologue wants to emit instructions
19635 between the push and the stack frame allocation. */
19637 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
19638 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
19645 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
19646 live_regs_mask >>= reg_base;
19649 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
19650 && (for_prologue || call_used_regs[reg_base + n_free]))
19652 live_regs_mask >>= 1;
19658 gcc_assert (amount / 4 * 4 == amount);
19660 if (amount >= 512 && (amount - n_free * 4) < 512)
19661 return (amount - 508) / 4;
19662 if (amount <= n_free * 4)
19667 /* The bits which aren't usefully expanded as rtl. */
19669 thumb_unexpanded_epilogue (void)
19671 arm_stack_offsets *offsets;
19673 unsigned long live_regs_mask = 0;
19674 int high_regs_pushed = 0;
19676 int had_to_push_lr;
19679 if (cfun->machine->return_used_this_function != 0)
19682 if (IS_NAKED (arm_current_func_type ()))
19685 offsets = arm_get_frame_offsets ();
19686 live_regs_mask = offsets->saved_regs_mask;
19687 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19689 /* If we can deduce the registers used from the function's return value.
19690 This is more reliable that examining df_regs_ever_live_p () because that
19691 will be set if the register is ever used in the function, not just if
19692 the register is used to hold a return value. */
19693 size = arm_size_return_regs ();
19695 extra_pop = thumb1_extra_regs_pushed (offsets, false);
19698 unsigned long extra_mask = (1 << extra_pop) - 1;
19699 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
19702 /* The prolog may have pushed some high registers to use as
19703 work registers. e.g. the testsuite file:
19704 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
19705 compiles to produce:
19706 push {r4, r5, r6, r7, lr}
19710 as part of the prolog. We have to undo that pushing here. */
19712 if (high_regs_pushed)
19714 unsigned long mask = live_regs_mask & 0xff;
19717 /* The available low registers depend on the size of the value we are
19725 /* Oh dear! We have no low registers into which we can pop
19728 ("no low registers available for popping high registers");
19730 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
19731 if (live_regs_mask & (1 << next_hi_reg))
19734 while (high_regs_pushed)
19736 /* Find lo register(s) into which the high register(s) can
19738 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19740 if (mask & (1 << regno))
19741 high_regs_pushed--;
19742 if (high_regs_pushed == 0)
19746 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
19748 /* Pop the values into the low register(s). */
19749 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
19751 /* Move the value(s) into the high registers. */
19752 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19754 if (mask & (1 << regno))
19756 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
19759 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
19760 if (live_regs_mask & (1 << next_hi_reg))
19765 live_regs_mask &= ~0x0f00;
19768 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
19769 live_regs_mask &= 0xff;
19771 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
19773 /* Pop the return address into the PC. */
19774 if (had_to_push_lr)
19775 live_regs_mask |= 1 << PC_REGNUM;
19777 /* Either no argument registers were pushed or a backtrace
19778 structure was created which includes an adjusted stack
19779 pointer, so just pop everything. */
19780 if (live_regs_mask)
19781 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19784 /* We have either just popped the return address into the
19785 PC or it is was kept in LR for the entire function.
19786 Note that thumb_pushpop has already called thumb_exit if the
19787 PC was in the list. */
19788 if (!had_to_push_lr)
19789 thumb_exit (asm_out_file, LR_REGNUM);
19793 /* Pop everything but the return address. */
19794 if (live_regs_mask)
19795 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19798 if (had_to_push_lr)
19802 /* We have no free low regs, so save one. */
19803 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
19807 /* Get the return address into a temporary register. */
19808 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
19809 1 << LAST_ARG_REGNUM);
19813 /* Move the return address to lr. */
19814 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
19816 /* Restore the low register. */
19817 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
19822 regno = LAST_ARG_REGNUM;
19827 /* Remove the argument registers that were pushed onto the stack. */
19828 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
19829 SP_REGNUM, SP_REGNUM,
19830 crtl->args.pretend_args_size);
19832 thumb_exit (asm_out_file, regno);
19838 /* Functions to save and restore machine-specific function data. */
19839 static struct machine_function *
19840 arm_init_machine_status (void)
19842 struct machine_function *machine;
19843 machine = ggc_alloc_cleared_machine_function ();
19845 #if ARM_FT_UNKNOWN != 0
19846 machine->func_type = ARM_FT_UNKNOWN;
19851 /* Return an RTX indicating where the return address to the
19852 calling function can be found. */
19854 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
19859 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
19862 /* Do anything needed before RTL is emitted for each function. */
19864 arm_init_expanders (void)
19866 /* Arrange to initialize and mark the machine per-function status. */
19867 init_machine_status = arm_init_machine_status;
19869 /* This is to stop the combine pass optimizing away the alignment
19870 adjustment of va_arg. */
19871 /* ??? It is claimed that this should not be necessary. */
19873 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
19877 /* Like arm_compute_initial_elimination offset. Simpler because there
19878 isn't an ABI specified frame pointer for Thumb. Instead, we set it
19879 to point at the base of the local variables after static stack
19880 space for a function has been allocated. */
19883 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
19885 arm_stack_offsets *offsets;
19887 offsets = arm_get_frame_offsets ();
19891 case ARG_POINTER_REGNUM:
19894 case STACK_POINTER_REGNUM:
19895 return offsets->outgoing_args - offsets->saved_args;
19897 case FRAME_POINTER_REGNUM:
19898 return offsets->soft_frame - offsets->saved_args;
19900 case ARM_HARD_FRAME_POINTER_REGNUM:
19901 return offsets->saved_regs - offsets->saved_args;
19903 case THUMB_HARD_FRAME_POINTER_REGNUM:
19904 return offsets->locals_base - offsets->saved_args;
19907 gcc_unreachable ();
19911 case FRAME_POINTER_REGNUM:
19914 case STACK_POINTER_REGNUM:
19915 return offsets->outgoing_args - offsets->soft_frame;
19917 case ARM_HARD_FRAME_POINTER_REGNUM:
19918 return offsets->saved_regs - offsets->soft_frame;
19920 case THUMB_HARD_FRAME_POINTER_REGNUM:
19921 return offsets->locals_base - offsets->soft_frame;
19924 gcc_unreachable ();
19929 gcc_unreachable ();
19933 /* Generate the rest of a function's prologue. */
19935 thumb1_expand_prologue (void)
19939 HOST_WIDE_INT amount;
19940 arm_stack_offsets *offsets;
19941 unsigned long func_type;
19943 unsigned long live_regs_mask;
19945 func_type = arm_current_func_type ();
19947 /* Naked functions don't have prologues. */
19948 if (IS_NAKED (func_type))
19951 if (IS_INTERRUPT (func_type))
19953 error ("interrupt Service Routines cannot be coded in Thumb mode");
19957 offsets = arm_get_frame_offsets ();
19958 live_regs_mask = offsets->saved_regs_mask;
19959 /* Load the pic register before setting the frame pointer,
19960 so we can use r7 as a temporary work register. */
19961 if (flag_pic && arm_pic_register != INVALID_REGNUM)
19962 arm_load_pic_register (live_regs_mask);
19964 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19965 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
19966 stack_pointer_rtx);
19968 amount = offsets->outgoing_args - offsets->saved_regs;
19969 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
19974 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19975 GEN_INT (- amount)));
19976 RTX_FRAME_RELATED_P (insn) = 1;
19982 /* The stack decrement is too big for an immediate value in a single
19983 insn. In theory we could issue multiple subtracts, but after
19984 three of them it becomes more space efficient to place the full
19985 value in the constant pool and load into a register. (Also the
19986 ARM debugger really likes to see only one stack decrement per
19987 function). So instead we look for a scratch register into which
19988 we can load the decrement, and then we subtract this from the
19989 stack pointer. Unfortunately on the thumb the only available
19990 scratch registers are the argument registers, and we cannot use
19991 these as they may hold arguments to the function. Instead we
19992 attempt to locate a call preserved register which is used by this
19993 function. If we can find one, then we know that it will have
19994 been pushed at the start of the prologue and so we can corrupt
19996 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
19997 if (live_regs_mask & (1 << regno))
20000 gcc_assert(regno <= LAST_LO_REGNUM);
20002 reg = gen_rtx_REG (SImode, regno);
20004 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20006 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20007 stack_pointer_rtx, reg));
20008 RTX_FRAME_RELATED_P (insn) = 1;
20009 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20010 plus_constant (stack_pointer_rtx,
20012 RTX_FRAME_RELATED_P (dwarf) = 1;
20013 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20017 if (frame_pointer_needed)
20018 thumb_set_frame_pointer (offsets);
20020 /* If we are profiling, make sure no instructions are scheduled before
20021 the call to mcount. Similarly if the user has requested no
20022 scheduling in the prolog. Similarly if we want non-call exceptions
20023 using the EABI unwinder, to prevent faulting instructions from being
20024 swapped with a stack adjustment. */
20025 if (crtl->profile || !TARGET_SCHED_PROLOG
20026 || (ARM_EABI_UNWIND_TABLES && cfun->can_throw_non_call_exceptions))
20027 emit_insn (gen_blockage ());
20029 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20030 if (live_regs_mask & 0xff)
20031 cfun->machine->lr_save_eliminated = 0;
20036 thumb1_expand_epilogue (void)
20038 HOST_WIDE_INT amount;
20039 arm_stack_offsets *offsets;
20042 /* Naked functions don't have prologues. */
20043 if (IS_NAKED (arm_current_func_type ()))
20046 offsets = arm_get_frame_offsets ();
20047 amount = offsets->outgoing_args - offsets->saved_regs;
20049 if (frame_pointer_needed)
20051 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20052 amount = offsets->locals_base - offsets->saved_regs;
20054 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20056 gcc_assert (amount >= 0);
20060 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20061 GEN_INT (amount)));
20064 /* r3 is always free in the epilogue. */
20065 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20067 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20068 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20072 /* Emit a USE (stack_pointer_rtx), so that
20073 the stack adjustment will not be deleted. */
20074 emit_insn (gen_prologue_use (stack_pointer_rtx));
20076 if (crtl->profile || !TARGET_SCHED_PROLOG)
20077 emit_insn (gen_blockage ());
20079 /* Emit a clobber for each insn that will be restored in the epilogue,
20080 so that flow2 will get register lifetimes correct. */
20081 for (regno = 0; regno < 13; regno++)
20082 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20083 emit_clobber (gen_rtx_REG (SImode, regno));
20085 if (! df_regs_ever_live_p (LR_REGNUM))
20086 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20090 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20092 arm_stack_offsets *offsets;
20093 unsigned long live_regs_mask = 0;
20094 unsigned long l_mask;
20095 unsigned high_regs_pushed = 0;
20096 int cfa_offset = 0;
20099 if (IS_NAKED (arm_current_func_type ()))
20102 if (is_called_in_ARM_mode (current_function_decl))
20106 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20107 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20109 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20111 /* Generate code sequence to switch us into Thumb mode. */
20112 /* The .code 32 directive has already been emitted by
20113 ASM_DECLARE_FUNCTION_NAME. */
20114 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20115 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20117 /* Generate a label, so that the debugger will notice the
20118 change in instruction sets. This label is also used by
20119 the assembler to bypass the ARM code when this function
20120 is called from a Thumb encoded function elsewhere in the
20121 same file. Hence the definition of STUB_NAME here must
20122 agree with the definition in gas/config/tc-arm.c. */
20124 #define STUB_NAME ".real_start_of"
20126 fprintf (f, "\t.code\t16\n");
20128 if (arm_dllexport_name_p (name))
20129 name = arm_strip_name_encoding (name);
20131 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20132 fprintf (f, "\t.thumb_func\n");
20133 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20136 if (crtl->args.pretend_args_size)
20138 /* Output unwind directive for the stack adjustment. */
20139 if (ARM_EABI_UNWIND_TABLES)
20140 fprintf (f, "\t.pad #%d\n",
20141 crtl->args.pretend_args_size);
20143 if (cfun->machine->uses_anonymous_args)
20147 fprintf (f, "\tpush\t{");
20149 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20151 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20152 regno <= LAST_ARG_REGNUM;
20154 asm_fprintf (f, "%r%s", regno,
20155 regno == LAST_ARG_REGNUM ? "" : ", ");
20157 fprintf (f, "}\n");
20160 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20161 SP_REGNUM, SP_REGNUM,
20162 crtl->args.pretend_args_size);
20164 /* We don't need to record the stores for unwinding (would it
20165 help the debugger any if we did?), but record the change in
20166 the stack pointer. */
20167 if (dwarf2out_do_frame ())
20169 char *l = dwarf2out_cfi_label (false);
20171 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
20172 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20176 /* Get the registers we are going to push. */
20177 offsets = arm_get_frame_offsets ();
20178 live_regs_mask = offsets->saved_regs_mask;
20179 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
20180 l_mask = live_regs_mask & 0x40ff;
20181 /* Then count how many other high registers will need to be pushed. */
20182 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20184 if (TARGET_BACKTRACE)
20187 unsigned work_register;
20189 /* We have been asked to create a stack backtrace structure.
20190 The code looks like this:
20194 0 sub SP, #16 Reserve space for 4 registers.
20195 2 push {R7} Push low registers.
20196 4 add R7, SP, #20 Get the stack pointer before the push.
20197 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
20198 8 mov R7, PC Get hold of the start of this code plus 12.
20199 10 str R7, [SP, #16] Store it.
20200 12 mov R7, FP Get hold of the current frame pointer.
20201 14 str R7, [SP, #4] Store it.
20202 16 mov R7, LR Get hold of the current return address.
20203 18 str R7, [SP, #12] Store it.
20204 20 add R7, SP, #16 Point at the start of the backtrace structure.
20205 22 mov FP, R7 Put this value into the frame pointer. */
20207 work_register = thumb_find_work_register (live_regs_mask);
20209 if (ARM_EABI_UNWIND_TABLES)
20210 asm_fprintf (f, "\t.pad #16\n");
20213 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
20214 SP_REGNUM, SP_REGNUM);
20216 if (dwarf2out_do_frame ())
20218 char *l = dwarf2out_cfi_label (false);
20220 cfa_offset = cfa_offset + 16;
20221 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20226 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
20227 offset = bit_count (l_mask) * UNITS_PER_WORD;
20232 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20233 offset + 16 + crtl->args.pretend_args_size);
20235 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20238 /* Make sure that the instruction fetching the PC is in the right place
20239 to calculate "start of backtrace creation code + 12". */
20242 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20243 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20245 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20246 ARM_HARD_FRAME_POINTER_REGNUM);
20247 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20252 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20253 ARM_HARD_FRAME_POINTER_REGNUM);
20254 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20256 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20257 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20261 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
20262 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20264 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20266 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
20267 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
20269 /* Optimization: If we are not pushing any low registers but we are going
20270 to push some high registers then delay our first push. This will just
20271 be a push of LR and we can combine it with the push of the first high
20273 else if ((l_mask & 0xff) != 0
20274 || (high_regs_pushed == 0 && l_mask))
20276 unsigned long mask = l_mask;
20277 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
20278 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
20281 if (high_regs_pushed)
20283 unsigned pushable_regs;
20284 unsigned next_hi_reg;
20286 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
20287 if (live_regs_mask & (1 << next_hi_reg))
20290 pushable_regs = l_mask & 0xff;
20292 if (pushable_regs == 0)
20293 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
20295 while (high_regs_pushed > 0)
20297 unsigned long real_regs_mask = 0;
20299 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
20301 if (pushable_regs & (1 << regno))
20303 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
20305 high_regs_pushed --;
20306 real_regs_mask |= (1 << next_hi_reg);
20308 if (high_regs_pushed)
20310 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
20312 if (live_regs_mask & (1 << next_hi_reg))
20317 pushable_regs &= ~((1 << regno) - 1);
20323 /* If we had to find a work register and we have not yet
20324 saved the LR then add it to the list of regs to push. */
20325 if (l_mask == (1 << LR_REGNUM))
20327 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
20329 real_regs_mask | (1 << LR_REGNUM));
20333 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
20338 /* Handle the case of a double word load into a low register from
20339 a computed memory address. The computed address may involve a
20340 register which is overwritten by the load. */
20342 thumb_load_double_from_address (rtx *operands)
20350 gcc_assert (GET_CODE (operands[0]) == REG);
20351 gcc_assert (GET_CODE (operands[1]) == MEM);
20353 /* Get the memory address. */
20354 addr = XEXP (operands[1], 0);
20356 /* Work out how the memory address is computed. */
20357 switch (GET_CODE (addr))
20360 operands[2] = adjust_address (operands[1], SImode, 4);
20362 if (REGNO (operands[0]) == REGNO (addr))
20364 output_asm_insn ("ldr\t%H0, %2", operands);
20365 output_asm_insn ("ldr\t%0, %1", operands);
20369 output_asm_insn ("ldr\t%0, %1", operands);
20370 output_asm_insn ("ldr\t%H0, %2", operands);
20375 /* Compute <address> + 4 for the high order load. */
20376 operands[2] = adjust_address (operands[1], SImode, 4);
20378 output_asm_insn ("ldr\t%0, %1", operands);
20379 output_asm_insn ("ldr\t%H0, %2", operands);
20383 arg1 = XEXP (addr, 0);
20384 arg2 = XEXP (addr, 1);
20386 if (CONSTANT_P (arg1))
20387 base = arg2, offset = arg1;
20389 base = arg1, offset = arg2;
20391 gcc_assert (GET_CODE (base) == REG);
20393 /* Catch the case of <address> = <reg> + <reg> */
20394 if (GET_CODE (offset) == REG)
20396 int reg_offset = REGNO (offset);
20397 int reg_base = REGNO (base);
20398 int reg_dest = REGNO (operands[0]);
20400 /* Add the base and offset registers together into the
20401 higher destination register. */
20402 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
20403 reg_dest + 1, reg_base, reg_offset);
20405 /* Load the lower destination register from the address in
20406 the higher destination register. */
20407 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
20408 reg_dest, reg_dest + 1);
20410 /* Load the higher destination register from its own address
20412 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
20413 reg_dest + 1, reg_dest + 1);
20417 /* Compute <address> + 4 for the high order load. */
20418 operands[2] = adjust_address (operands[1], SImode, 4);
20420 /* If the computed address is held in the low order register
20421 then load the high order register first, otherwise always
20422 load the low order register first. */
20423 if (REGNO (operands[0]) == REGNO (base))
20425 output_asm_insn ("ldr\t%H0, %2", operands);
20426 output_asm_insn ("ldr\t%0, %1", operands);
20430 output_asm_insn ("ldr\t%0, %1", operands);
20431 output_asm_insn ("ldr\t%H0, %2", operands);
20437 /* With no registers to worry about we can just load the value
20439 operands[2] = adjust_address (operands[1], SImode, 4);
20441 output_asm_insn ("ldr\t%H0, %2", operands);
20442 output_asm_insn ("ldr\t%0, %1", operands);
20446 gcc_unreachable ();
20453 thumb_output_move_mem_multiple (int n, rtx *operands)
20460 if (REGNO (operands[4]) > REGNO (operands[5]))
20463 operands[4] = operands[5];
20466 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
20467 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
20471 if (REGNO (operands[4]) > REGNO (operands[5]))
20474 operands[4] = operands[5];
20477 if (REGNO (operands[5]) > REGNO (operands[6]))
20480 operands[5] = operands[6];
20483 if (REGNO (operands[4]) > REGNO (operands[5]))
20486 operands[4] = operands[5];
20490 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
20491 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
20495 gcc_unreachable ();
20501 /* Output a call-via instruction for thumb state. */
20503 thumb_call_via_reg (rtx reg)
20505 int regno = REGNO (reg);
20508 gcc_assert (regno < LR_REGNUM);
20510 /* If we are in the normal text section we can use a single instance
20511 per compilation unit. If we are doing function sections, then we need
20512 an entry per section, since we can't rely on reachability. */
20513 if (in_section == text_section)
20515 thumb_call_reg_needed = 1;
20517 if (thumb_call_via_label[regno] == NULL)
20518 thumb_call_via_label[regno] = gen_label_rtx ();
20519 labelp = thumb_call_via_label + regno;
20523 if (cfun->machine->call_via[regno] == NULL)
20524 cfun->machine->call_via[regno] = gen_label_rtx ();
20525 labelp = cfun->machine->call_via + regno;
20528 output_asm_insn ("bl\t%a0", labelp);
20532 /* Routines for generating rtl. */
20534 thumb_expand_movmemqi (rtx *operands)
20536 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
20537 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
20538 HOST_WIDE_INT len = INTVAL (operands[2]);
20539 HOST_WIDE_INT offset = 0;
20543 emit_insn (gen_movmem12b (out, in, out, in));
20549 emit_insn (gen_movmem8b (out, in, out, in));
20555 rtx reg = gen_reg_rtx (SImode);
20556 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
20557 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
20564 rtx reg = gen_reg_rtx (HImode);
20565 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
20566 plus_constant (in, offset))));
20567 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
20575 rtx reg = gen_reg_rtx (QImode);
20576 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
20577 plus_constant (in, offset))));
20578 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
20584 thumb_reload_out_hi (rtx *operands)
20586 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
20589 /* Handle reading a half-word from memory during reload. */
20591 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
20593 gcc_unreachable ();
20596 /* Return the length of a function name prefix
20597 that starts with the character 'c'. */
20599 arm_get_strip_length (int c)
20603 ARM_NAME_ENCODING_LENGTHS
20608 /* Return a pointer to a function's name with any
20609 and all prefix encodings stripped from it. */
20611 arm_strip_name_encoding (const char *name)
20615 while ((skip = arm_get_strip_length (* name)))
20621 /* If there is a '*' anywhere in the name's prefix, then
20622 emit the stripped name verbatim, otherwise prepend an
20623 underscore if leading underscores are being used. */
20625 arm_asm_output_labelref (FILE *stream, const char *name)
20630 while ((skip = arm_get_strip_length (* name)))
20632 verbatim |= (*name == '*');
20637 fputs (name, stream);
20639 asm_fprintf (stream, "%U%s", name);
20643 arm_file_start (void)
20647 if (TARGET_UNIFIED_ASM)
20648 asm_fprintf (asm_out_file, "\t.syntax unified\n");
20652 const char *fpu_name;
20653 if (arm_selected_arch)
20654 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
20656 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
20658 if (TARGET_SOFT_FLOAT)
20661 fpu_name = "softvfp";
20663 fpu_name = "softfpa";
20667 fpu_name = arm_fpu_desc->name;
20668 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
20670 if (TARGET_HARD_FLOAT)
20671 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
20672 if (TARGET_HARD_FLOAT_ABI)
20673 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
20676 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
20678 /* Some of these attributes only apply when the corresponding features
20679 are used. However we don't have any easy way of figuring this out.
20680 Conservatively record the setting that would have been used. */
20682 /* Tag_ABI_FP_rounding. */
20683 if (flag_rounding_math)
20684 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
20685 if (!flag_unsafe_math_optimizations)
20687 /* Tag_ABI_FP_denomal. */
20688 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
20689 /* Tag_ABI_FP_exceptions. */
20690 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
20692 /* Tag_ABI_FP_user_exceptions. */
20693 if (flag_signaling_nans)
20694 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
20695 /* Tag_ABI_FP_number_model. */
20696 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
20697 flag_finite_math_only ? 1 : 3);
20699 /* Tag_ABI_align8_needed. */
20700 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
20701 /* Tag_ABI_align8_preserved. */
20702 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
20703 /* Tag_ABI_enum_size. */
20704 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
20705 flag_short_enums ? 1 : 2);
20707 /* Tag_ABI_optimization_goals. */
20710 else if (optimize >= 2)
20716 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
20718 /* Tag_ABI_FP_16bit_format. */
20719 if (arm_fp16_format)
20720 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
20721 (int)arm_fp16_format);
20723 if (arm_lang_output_object_attributes_hook)
20724 arm_lang_output_object_attributes_hook();
20726 default_file_start();
20730 arm_file_end (void)
20734 if (NEED_INDICATE_EXEC_STACK)
20735 /* Add .note.GNU-stack. */
20736 file_end_indicate_exec_stack ();
20738 if (! thumb_call_reg_needed)
20741 switch_to_section (text_section);
20742 asm_fprintf (asm_out_file, "\t.code 16\n");
20743 ASM_OUTPUT_ALIGN (asm_out_file, 1);
20745 for (regno = 0; regno < LR_REGNUM; regno++)
20747 rtx label = thumb_call_via_label[regno];
20751 targetm.asm_out.internal_label (asm_out_file, "L",
20752 CODE_LABEL_NUMBER (label));
20753 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20759 /* Symbols in the text segment can be accessed without indirecting via the
20760 constant pool; it may take an extra binary operation, but this is still
20761 faster than indirecting via memory. Don't do this when not optimizing,
20762 since we won't be calculating al of the offsets necessary to do this
20766 arm_encode_section_info (tree decl, rtx rtl, int first)
20768 if (optimize > 0 && TREE_CONSTANT (decl))
20769 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
20771 default_encode_section_info (decl, rtl, first);
20773 #endif /* !ARM_PE */
20776 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
20778 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
20779 && !strcmp (prefix, "L"))
20781 arm_ccfsm_state = 0;
20782 arm_target_insn = NULL;
20784 default_internal_label (stream, prefix, labelno);
20787 /* Output code to add DELTA to the first argument, and then jump
20788 to FUNCTION. Used for C++ multiple inheritance. */
20790 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
20791 HOST_WIDE_INT delta,
20792 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
20795 static int thunk_label = 0;
20798 int mi_delta = delta;
20799 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
20801 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
20804 mi_delta = - mi_delta;
20808 int labelno = thunk_label++;
20809 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
20810 /* Thunks are entered in arm mode when avaiable. */
20811 if (TARGET_THUMB1_ONLY)
20813 /* push r3 so we can use it as a temporary. */
20814 /* TODO: Omit this save if r3 is not used. */
20815 fputs ("\tpush {r3}\n", file);
20816 fputs ("\tldr\tr3, ", file);
20820 fputs ("\tldr\tr12, ", file);
20822 assemble_name (file, label);
20823 fputc ('\n', file);
20826 /* If we are generating PIC, the ldr instruction below loads
20827 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
20828 the address of the add + 8, so we have:
20830 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
20833 Note that we have "+ 1" because some versions of GNU ld
20834 don't set the low bit of the result for R_ARM_REL32
20835 relocations against thumb function symbols.
20836 On ARMv6M this is +4, not +8. */
20837 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
20838 assemble_name (file, labelpc);
20839 fputs (":\n", file);
20840 if (TARGET_THUMB1_ONLY)
20842 /* This is 2 insns after the start of the thunk, so we know it
20843 is 4-byte aligned. */
20844 fputs ("\tadd\tr3, pc, r3\n", file);
20845 fputs ("\tmov r12, r3\n", file);
20848 fputs ("\tadd\tr12, pc, r12\n", file);
20850 else if (TARGET_THUMB1_ONLY)
20851 fputs ("\tmov r12, r3\n", file);
20853 if (TARGET_THUMB1_ONLY)
20855 if (mi_delta > 255)
20857 fputs ("\tldr\tr3, ", file);
20858 assemble_name (file, label);
20859 fputs ("+4\n", file);
20860 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
20861 mi_op, this_regno, this_regno);
20863 else if (mi_delta != 0)
20865 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20866 mi_op, this_regno, this_regno,
20872 /* TODO: Use movw/movt for large constants when available. */
20873 while (mi_delta != 0)
20875 if ((mi_delta & (3 << shift)) == 0)
20879 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20880 mi_op, this_regno, this_regno,
20881 mi_delta & (0xff << shift));
20882 mi_delta &= ~(0xff << shift);
20889 if (TARGET_THUMB1_ONLY)
20890 fputs ("\tpop\t{r3}\n", file);
20892 fprintf (file, "\tbx\tr12\n");
20893 ASM_OUTPUT_ALIGN (file, 2);
20894 assemble_name (file, label);
20895 fputs (":\n", file);
20898 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
20899 rtx tem = XEXP (DECL_RTL (function), 0);
20900 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
20901 tem = gen_rtx_MINUS (GET_MODE (tem),
20903 gen_rtx_SYMBOL_REF (Pmode,
20904 ggc_strdup (labelpc)));
20905 assemble_integer (tem, 4, BITS_PER_WORD, 1);
20908 /* Output ".word .LTHUNKn". */
20909 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
20911 if (TARGET_THUMB1_ONLY && mi_delta > 255)
20912 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
20916 fputs ("\tb\t", file);
20917 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
20918 if (NEED_PLT_RELOC)
20919 fputs ("(PLT)", file);
20920 fputc ('\n', file);
20925 arm_emit_vector_const (FILE *file, rtx x)
20928 const char * pattern;
20930 gcc_assert (GET_CODE (x) == CONST_VECTOR);
20932 switch (GET_MODE (x))
20934 case V2SImode: pattern = "%08x"; break;
20935 case V4HImode: pattern = "%04x"; break;
20936 case V8QImode: pattern = "%02x"; break;
20937 default: gcc_unreachable ();
20940 fprintf (file, "0x");
20941 for (i = CONST_VECTOR_NUNITS (x); i--;)
20945 element = CONST_VECTOR_ELT (x, i);
20946 fprintf (file, pattern, INTVAL (element));
20952 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
20953 HFmode constant pool entries are actually loaded with ldr. */
20955 arm_emit_fp16_const (rtx c)
20960 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
20961 bits = real_to_target (NULL, &r, HFmode);
20962 if (WORDS_BIG_ENDIAN)
20963 assemble_zeros (2);
20964 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
20965 if (!WORDS_BIG_ENDIAN)
20966 assemble_zeros (2);
20970 arm_output_load_gr (rtx *operands)
20977 if (GET_CODE (operands [1]) != MEM
20978 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
20979 || GET_CODE (reg = XEXP (sum, 0)) != REG
20980 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
20981 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
20982 return "wldrw%?\t%0, %1";
20984 /* Fix up an out-of-range load of a GR register. */
20985 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
20986 wcgr = operands[0];
20988 output_asm_insn ("ldr%?\t%0, %1", operands);
20990 operands[0] = wcgr;
20992 output_asm_insn ("tmcr%?\t%0, %1", operands);
20993 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
20998 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21000 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21001 named arg and all anonymous args onto the stack.
21002 XXX I know the prologue shouldn't be pushing registers, but it is faster
21006 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21007 enum machine_mode mode,
21010 int second_time ATTRIBUTE_UNUSED)
21014 cfun->machine->uses_anonymous_args = 1;
21015 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21017 nregs = pcum->aapcs_ncrn;
21018 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21022 nregs = pcum->nregs;
21024 if (nregs < NUM_ARG_REGS)
21025 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21028 /* Return nonzero if the CONSUMER instruction (a store) does not need
21029 PRODUCER's value to calculate the address. */
21032 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21034 rtx value = PATTERN (producer);
21035 rtx addr = PATTERN (consumer);
21037 if (GET_CODE (value) == COND_EXEC)
21038 value = COND_EXEC_CODE (value);
21039 if (GET_CODE (value) == PARALLEL)
21040 value = XVECEXP (value, 0, 0);
21041 value = XEXP (value, 0);
21042 if (GET_CODE (addr) == COND_EXEC)
21043 addr = COND_EXEC_CODE (addr);
21044 if (GET_CODE (addr) == PARALLEL)
21045 addr = XVECEXP (addr, 0, 0);
21046 addr = XEXP (addr, 0);
21048 return !reg_overlap_mentioned_p (value, addr);
21051 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21052 have an early register shift value or amount dependency on the
21053 result of PRODUCER. */
21056 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21058 rtx value = PATTERN (producer);
21059 rtx op = PATTERN (consumer);
21062 if (GET_CODE (value) == COND_EXEC)
21063 value = COND_EXEC_CODE (value);
21064 if (GET_CODE (value) == PARALLEL)
21065 value = XVECEXP (value, 0, 0);
21066 value = XEXP (value, 0);
21067 if (GET_CODE (op) == COND_EXEC)
21068 op = COND_EXEC_CODE (op);
21069 if (GET_CODE (op) == PARALLEL)
21070 op = XVECEXP (op, 0, 0);
21073 early_op = XEXP (op, 0);
21074 /* This is either an actual independent shift, or a shift applied to
21075 the first operand of another operation. We want the whole shift
21077 if (GET_CODE (early_op) == REG)
21080 return !reg_overlap_mentioned_p (value, early_op);
21083 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21084 have an early register shift value dependency on the result of
21088 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21090 rtx value = PATTERN (producer);
21091 rtx op = PATTERN (consumer);
21094 if (GET_CODE (value) == COND_EXEC)
21095 value = COND_EXEC_CODE (value);
21096 if (GET_CODE (value) == PARALLEL)
21097 value = XVECEXP (value, 0, 0);
21098 value = XEXP (value, 0);
21099 if (GET_CODE (op) == COND_EXEC)
21100 op = COND_EXEC_CODE (op);
21101 if (GET_CODE (op) == PARALLEL)
21102 op = XVECEXP (op, 0, 0);
21105 early_op = XEXP (op, 0);
21107 /* This is either an actual independent shift, or a shift applied to
21108 the first operand of another operation. We want the value being
21109 shifted, in either case. */
21110 if (GET_CODE (early_op) != REG)
21111 early_op = XEXP (early_op, 0);
21113 return !reg_overlap_mentioned_p (value, early_op);
21116 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21117 have an early register mult dependency on the result of
21121 arm_no_early_mul_dep (rtx producer, rtx consumer)
21123 rtx value = PATTERN (producer);
21124 rtx op = PATTERN (consumer);
21126 if (GET_CODE (value) == COND_EXEC)
21127 value = COND_EXEC_CODE (value);
21128 if (GET_CODE (value) == PARALLEL)
21129 value = XVECEXP (value, 0, 0);
21130 value = XEXP (value, 0);
21131 if (GET_CODE (op) == COND_EXEC)
21132 op = COND_EXEC_CODE (op);
21133 if (GET_CODE (op) == PARALLEL)
21134 op = XVECEXP (op, 0, 0);
21137 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
21139 if (GET_CODE (XEXP (op, 0)) == MULT)
21140 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
21142 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
21148 /* We can't rely on the caller doing the proper promotion when
21149 using APCS or ATPCS. */
21152 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
21154 return !TARGET_AAPCS_BASED;
21157 static enum machine_mode
21158 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
21159 enum machine_mode mode,
21160 int *punsignedp ATTRIBUTE_UNUSED,
21161 const_tree fntype ATTRIBUTE_UNUSED,
21162 int for_return ATTRIBUTE_UNUSED)
21164 if (GET_MODE_CLASS (mode) == MODE_INT
21165 && GET_MODE_SIZE (mode) < 4)
21171 /* AAPCS based ABIs use short enums by default. */
21174 arm_default_short_enums (void)
21176 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
21180 /* AAPCS requires that anonymous bitfields affect structure alignment. */
21183 arm_align_anon_bitfield (void)
21185 return TARGET_AAPCS_BASED;
21189 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
21192 arm_cxx_guard_type (void)
21194 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
21197 /* Return non-zero if the consumer (a multiply-accumulate instruction)
21198 has an accumulator dependency on the result of the producer (a
21199 multiplication instruction) and no other dependency on that result. */
21201 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
21203 rtx mul = PATTERN (producer);
21204 rtx mac = PATTERN (consumer);
21206 rtx mac_op0, mac_op1, mac_acc;
21208 if (GET_CODE (mul) == COND_EXEC)
21209 mul = COND_EXEC_CODE (mul);
21210 if (GET_CODE (mac) == COND_EXEC)
21211 mac = COND_EXEC_CODE (mac);
21213 /* Check that mul is of the form (set (...) (mult ...))
21214 and mla is of the form (set (...) (plus (mult ...) (...))). */
21215 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
21216 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
21217 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
21220 mul_result = XEXP (mul, 0);
21221 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
21222 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
21223 mac_acc = XEXP (XEXP (mac, 1), 1);
21225 return (reg_overlap_mentioned_p (mul_result, mac_acc)
21226 && !reg_overlap_mentioned_p (mul_result, mac_op0)
21227 && !reg_overlap_mentioned_p (mul_result, mac_op1));
21231 /* The EABI says test the least significant bit of a guard variable. */
21234 arm_cxx_guard_mask_bit (void)
21236 return TARGET_AAPCS_BASED;
21240 /* The EABI specifies that all array cookies are 8 bytes long. */
21243 arm_get_cookie_size (tree type)
21247 if (!TARGET_AAPCS_BASED)
21248 return default_cxx_get_cookie_size (type);
21250 size = build_int_cst (sizetype, 8);
21255 /* The EABI says that array cookies should also contain the element size. */
21258 arm_cookie_has_size (void)
21260 return TARGET_AAPCS_BASED;
21264 /* The EABI says constructors and destructors should return a pointer to
21265 the object constructed/destroyed. */
21268 arm_cxx_cdtor_returns_this (void)
21270 return TARGET_AAPCS_BASED;
21273 /* The EABI says that an inline function may never be the key
21277 arm_cxx_key_method_may_be_inline (void)
21279 return !TARGET_AAPCS_BASED;
21283 arm_cxx_determine_class_data_visibility (tree decl)
21285 if (!TARGET_AAPCS_BASED
21286 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
21289 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
21290 is exported. However, on systems without dynamic vague linkage,
21291 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
21292 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
21293 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
21295 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
21296 DECL_VISIBILITY_SPECIFIED (decl) = 1;
21300 arm_cxx_class_data_always_comdat (void)
21302 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
21303 vague linkage if the class has no key function. */
21304 return !TARGET_AAPCS_BASED;
21308 /* The EABI says __aeabi_atexit should be used to register static
21312 arm_cxx_use_aeabi_atexit (void)
21314 return TARGET_AAPCS_BASED;
21319 arm_set_return_address (rtx source, rtx scratch)
21321 arm_stack_offsets *offsets;
21322 HOST_WIDE_INT delta;
21324 unsigned long saved_regs;
21326 offsets = arm_get_frame_offsets ();
21327 saved_regs = offsets->saved_regs_mask;
21329 if ((saved_regs & (1 << LR_REGNUM)) == 0)
21330 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
21333 if (frame_pointer_needed)
21334 addr = plus_constant(hard_frame_pointer_rtx, -4);
21337 /* LR will be the first saved register. */
21338 delta = offsets->outgoing_args - (offsets->frame + 4);
21343 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
21344 GEN_INT (delta & ~4095)));
21349 addr = stack_pointer_rtx;
21351 addr = plus_constant (addr, delta);
21353 emit_move_insn (gen_frame_mem (Pmode, addr), source);
21359 thumb_set_return_address (rtx source, rtx scratch)
21361 arm_stack_offsets *offsets;
21362 HOST_WIDE_INT delta;
21363 HOST_WIDE_INT limit;
21366 unsigned long mask;
21370 offsets = arm_get_frame_offsets ();
21371 mask = offsets->saved_regs_mask;
21372 if (mask & (1 << LR_REGNUM))
21375 /* Find the saved regs. */
21376 if (frame_pointer_needed)
21378 delta = offsets->soft_frame - offsets->saved_args;
21379 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
21385 delta = offsets->outgoing_args - offsets->saved_args;
21388 /* Allow for the stack frame. */
21389 if (TARGET_THUMB1 && TARGET_BACKTRACE)
21391 /* The link register is always the first saved register. */
21394 /* Construct the address. */
21395 addr = gen_rtx_REG (SImode, reg);
21398 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
21399 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
21403 addr = plus_constant (addr, delta);
21405 emit_move_insn (gen_frame_mem (Pmode, addr), source);
21408 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
21411 /* Implements target hook vector_mode_supported_p. */
21413 arm_vector_mode_supported_p (enum machine_mode mode)
21415 /* Neon also supports V2SImode, etc. listed in the clause below. */
21416 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
21417 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
21420 if ((TARGET_NEON || TARGET_IWMMXT)
21421 && ((mode == V2SImode)
21422 || (mode == V4HImode)
21423 || (mode == V8QImode)))
21429 /* Implements target hook small_register_classes_for_mode_p. */
21431 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
21433 return TARGET_THUMB1;
21436 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
21437 ARM insns and therefore guarantee that the shift count is modulo 256.
21438 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
21439 guarantee no particular behavior for out-of-range counts. */
21441 static unsigned HOST_WIDE_INT
21442 arm_shift_truncation_mask (enum machine_mode mode)
21444 return mode == SImode ? 255 : 0;
21448 /* Map internal gcc register numbers to DWARF2 register numbers. */
21451 arm_dbx_register_number (unsigned int regno)
21456 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
21457 compatibility. The EABI defines them as registers 96-103. */
21458 if (IS_FPA_REGNUM (regno))
21459 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
21461 if (IS_VFP_REGNUM (regno))
21463 /* See comment in arm_dwarf_register_span. */
21464 if (VFP_REGNO_OK_FOR_SINGLE (regno))
21465 return 64 + regno - FIRST_VFP_REGNUM;
21467 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
21470 if (IS_IWMMXT_GR_REGNUM (regno))
21471 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
21473 if (IS_IWMMXT_REGNUM (regno))
21474 return 112 + regno - FIRST_IWMMXT_REGNUM;
21476 gcc_unreachable ();
21479 /* Dwarf models VFPv3 registers as 32 64-bit registers.
21480 GCC models tham as 64 32-bit registers, so we need to describe this to
21481 the DWARF generation code. Other registers can use the default. */
21483 arm_dwarf_register_span (rtx rtl)
21490 regno = REGNO (rtl);
21491 if (!IS_VFP_REGNUM (regno))
21494 /* XXX FIXME: The EABI defines two VFP register ranges:
21495 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
21497 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
21498 corresponding D register. Until GDB supports this, we shall use the
21499 legacy encodings. We also use these encodings for D0-D15 for
21500 compatibility with older debuggers. */
21501 if (VFP_REGNO_OK_FOR_SINGLE (regno))
21504 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
21505 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
21506 regno = (regno - FIRST_VFP_REGNUM) / 2;
21507 for (i = 0; i < nregs; i++)
21508 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
21513 #ifdef TARGET_UNWIND_INFO
21514 /* Emit unwind directives for a store-multiple instruction or stack pointer
21515 push during alignment.
21516 These should only ever be generated by the function prologue code, so
21517 expect them to have a particular form. */
21520 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
21523 HOST_WIDE_INT offset;
21524 HOST_WIDE_INT nregs;
21530 e = XVECEXP (p, 0, 0);
21531 if (GET_CODE (e) != SET)
21534 /* First insn will adjust the stack pointer. */
21535 if (GET_CODE (e) != SET
21536 || GET_CODE (XEXP (e, 0)) != REG
21537 || REGNO (XEXP (e, 0)) != SP_REGNUM
21538 || GET_CODE (XEXP (e, 1)) != PLUS)
21541 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
21542 nregs = XVECLEN (p, 0) - 1;
21544 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
21547 /* The function prologue may also push pc, but not annotate it as it is
21548 never restored. We turn this into a stack pointer adjustment. */
21549 if (nregs * 4 == offset - 4)
21551 fprintf (asm_out_file, "\t.pad #4\n");
21555 fprintf (asm_out_file, "\t.save {");
21557 else if (IS_VFP_REGNUM (reg))
21560 fprintf (asm_out_file, "\t.vsave {");
21562 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
21564 /* FPA registers are done differently. */
21565 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
21569 /* Unknown register type. */
21572 /* If the stack increment doesn't match the size of the saved registers,
21573 something has gone horribly wrong. */
21574 if (offset != nregs * reg_size)
21579 /* The remaining insns will describe the stores. */
21580 for (i = 1; i <= nregs; i++)
21582 /* Expect (set (mem <addr>) (reg)).
21583 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
21584 e = XVECEXP (p, 0, i);
21585 if (GET_CODE (e) != SET
21586 || GET_CODE (XEXP (e, 0)) != MEM
21587 || GET_CODE (XEXP (e, 1)) != REG)
21590 reg = REGNO (XEXP (e, 1));
21595 fprintf (asm_out_file, ", ");
21596 /* We can't use %r for vfp because we need to use the
21597 double precision register names. */
21598 if (IS_VFP_REGNUM (reg))
21599 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
21601 asm_fprintf (asm_out_file, "%r", reg);
21603 #ifdef ENABLE_CHECKING
21604 /* Check that the addresses are consecutive. */
21605 e = XEXP (XEXP (e, 0), 0);
21606 if (GET_CODE (e) == PLUS)
21608 offset += reg_size;
21609 if (GET_CODE (XEXP (e, 0)) != REG
21610 || REGNO (XEXP (e, 0)) != SP_REGNUM
21611 || GET_CODE (XEXP (e, 1)) != CONST_INT
21612 || offset != INTVAL (XEXP (e, 1)))
21616 || GET_CODE (e) != REG
21617 || REGNO (e) != SP_REGNUM)
21621 fprintf (asm_out_file, "}\n");
21624 /* Emit unwind directives for a SET. */
21627 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
21635 switch (GET_CODE (e0))
21638 /* Pushing a single register. */
21639 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
21640 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
21641 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
21644 asm_fprintf (asm_out_file, "\t.save ");
21645 if (IS_VFP_REGNUM (REGNO (e1)))
21646 asm_fprintf(asm_out_file, "{d%d}\n",
21647 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
21649 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
21653 if (REGNO (e0) == SP_REGNUM)
21655 /* A stack increment. */
21656 if (GET_CODE (e1) != PLUS
21657 || GET_CODE (XEXP (e1, 0)) != REG
21658 || REGNO (XEXP (e1, 0)) != SP_REGNUM
21659 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
21662 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
21663 -INTVAL (XEXP (e1, 1)));
21665 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
21667 HOST_WIDE_INT offset;
21669 if (GET_CODE (e1) == PLUS)
21671 if (GET_CODE (XEXP (e1, 0)) != REG
21672 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
21674 reg = REGNO (XEXP (e1, 0));
21675 offset = INTVAL (XEXP (e1, 1));
21676 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
21677 HARD_FRAME_POINTER_REGNUM, reg,
21680 else if (GET_CODE (e1) == REG)
21683 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
21684 HARD_FRAME_POINTER_REGNUM, reg);
21689 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
21691 /* Move from sp to reg. */
21692 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
21694 else if (GET_CODE (e1) == PLUS
21695 && GET_CODE (XEXP (e1, 0)) == REG
21696 && REGNO (XEXP (e1, 0)) == SP_REGNUM
21697 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
21699 /* Set reg to offset from sp. */
21700 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
21701 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
21703 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
21705 /* Stack pointer save before alignment. */
21707 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
21720 /* Emit unwind directives for the given insn. */
21723 arm_unwind_emit (FILE * asm_out_file, rtx insn)
21727 if (!ARM_EABI_UNWIND_TABLES)
21730 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21731 && (TREE_NOTHROW (current_function_decl)
21732 || crtl->all_throwers_are_sibcalls))
21735 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
21738 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
21740 pat = XEXP (pat, 0);
21742 pat = PATTERN (insn);
21744 switch (GET_CODE (pat))
21747 arm_unwind_emit_set (asm_out_file, pat);
21751 /* Store multiple. */
21752 arm_unwind_emit_sequence (asm_out_file, pat);
21761 /* Output a reference from a function exception table to the type_info
21762 object X. The EABI specifies that the symbol should be relocated by
21763 an R_ARM_TARGET2 relocation. */
21766 arm_output_ttype (rtx x)
21768 fputs ("\t.word\t", asm_out_file);
21769 output_addr_const (asm_out_file, x);
21770 /* Use special relocations for symbol references. */
21771 if (GET_CODE (x) != CONST_INT)
21772 fputs ("(TARGET2)", asm_out_file);
21773 fputc ('\n', asm_out_file);
21777 #endif /* TARGET_UNWIND_INFO */
21780 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
21781 stack alignment. */
21784 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
21786 rtx unspec = SET_SRC (pattern);
21787 gcc_assert (GET_CODE (unspec) == UNSPEC);
21791 case UNSPEC_STACK_ALIGN:
21792 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
21793 put anything on the stack, so hopefully it won't matter.
21794 CFA = SP will be correct after alignment. */
21795 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
21796 SET_DEST (pattern));
21799 gcc_unreachable ();
21804 /* Output unwind directives for the start/end of a function. */
21807 arm_output_fn_unwind (FILE * f, bool prologue)
21809 if (!ARM_EABI_UNWIND_TABLES)
21813 fputs ("\t.fnstart\n", f);
21816 /* If this function will never be unwound, then mark it as such.
21817 The came condition is used in arm_unwind_emit to suppress
21818 the frame annotations. */
21819 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21820 && (TREE_NOTHROW (current_function_decl)
21821 || crtl->all_throwers_are_sibcalls))
21822 fputs("\t.cantunwind\n", f);
21824 fputs ("\t.fnend\n", f);
21829 arm_emit_tls_decoration (FILE *fp, rtx x)
21831 enum tls_reloc reloc;
21834 val = XVECEXP (x, 0, 0);
21835 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
21837 output_addr_const (fp, val);
21842 fputs ("(tlsgd)", fp);
21845 fputs ("(tlsldm)", fp);
21848 fputs ("(tlsldo)", fp);
21851 fputs ("(gottpoff)", fp);
21854 fputs ("(tpoff)", fp);
21857 gcc_unreachable ();
21865 fputs (" + (. - ", fp);
21866 output_addr_const (fp, XVECEXP (x, 0, 2));
21868 output_addr_const (fp, XVECEXP (x, 0, 3));
21878 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
21881 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
21883 gcc_assert (size == 4);
21884 fputs ("\t.word\t", file);
21885 output_addr_const (file, x);
21886 fputs ("(tlsldo)", file);
21890 arm_output_addr_const_extra (FILE *fp, rtx x)
21892 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
21893 return arm_emit_tls_decoration (fp, x);
21894 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
21897 int labelno = INTVAL (XVECEXP (x, 0, 0));
21899 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
21900 assemble_name_raw (fp, label);
21904 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
21906 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
21910 output_addr_const (fp, XVECEXP (x, 0, 0));
21914 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
21916 output_addr_const (fp, XVECEXP (x, 0, 0));
21920 output_addr_const (fp, XVECEXP (x, 0, 1));
21924 else if (GET_CODE (x) == CONST_VECTOR)
21925 return arm_emit_vector_const (fp, x);
21930 /* Output assembly for a shift instruction.
21931 SET_FLAGS determines how the instruction modifies the condition codes.
21932 0 - Do not set condition codes.
21933 1 - Set condition codes.
21934 2 - Use smallest instruction. */
21936 arm_output_shift(rtx * operands, int set_flags)
21939 static const char flag_chars[3] = {'?', '.', '!'};
21944 c = flag_chars[set_flags];
21945 if (TARGET_UNIFIED_ASM)
21947 shift = shift_op(operands[3], &val);
21951 operands[2] = GEN_INT(val);
21952 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
21955 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
21958 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
21959 output_asm_insn (pattern, operands);
21963 /* Output a Thumb-1 casesi dispatch sequence. */
21965 thumb1_output_casesi (rtx *operands)
21967 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
21969 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21971 switch (GET_MODE(diff_vec))
21974 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21975 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
21977 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21978 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
21980 return "bl\t%___gnu_thumb1_case_si";
21982 gcc_unreachable ();
21986 /* Output a Thumb-2 casesi instruction. */
21988 thumb2_output_casesi (rtx *operands)
21990 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
21992 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21994 output_asm_insn ("cmp\t%0, %1", operands);
21995 output_asm_insn ("bhi\t%l3", operands);
21996 switch (GET_MODE(diff_vec))
21999 return "tbb\t[%|pc, %0]";
22001 return "tbh\t[%|pc, %0, lsl #1]";
22005 output_asm_insn ("adr\t%4, %l2", operands);
22006 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
22007 output_asm_insn ("add\t%4, %4, %5", operands);
22012 output_asm_insn ("adr\t%4, %l2", operands);
22013 return "ldr\t%|pc, [%4, %0, lsl #2]";
22016 gcc_unreachable ();
22020 /* Most ARM cores are single issue, but some newer ones can dual issue.
22021 The scheduler descriptions rely on this being correct. */
22023 arm_issue_rate (void)
22038 /* A table and a function to perform ARM-specific name mangling for
22039 NEON vector types in order to conform to the AAPCS (see "Procedure
22040 Call Standard for the ARM Architecture", Appendix A). To qualify
22041 for emission with the mangled names defined in that document, a
22042 vector type must not only be of the correct mode but also be
22043 composed of NEON vector element types (e.g. __builtin_neon_qi). */
22046 enum machine_mode mode;
22047 const char *element_type_name;
22048 const char *aapcs_name;
22049 } arm_mangle_map_entry;
22051 static arm_mangle_map_entry arm_mangle_map[] = {
22052 /* 64-bit containerized types. */
22053 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
22054 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
22055 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
22056 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
22057 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
22058 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
22059 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
22060 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
22061 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
22062 /* 128-bit containerized types. */
22063 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
22064 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
22065 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
22066 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
22067 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
22068 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
22069 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
22070 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
22071 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
22072 { VOIDmode, NULL, NULL }
22076 arm_mangle_type (const_tree type)
22078 arm_mangle_map_entry *pos = arm_mangle_map;
22080 /* The ARM ABI documents (10th October 2008) say that "__va_list"
22081 has to be managled as if it is in the "std" namespace. */
22082 if (TARGET_AAPCS_BASED
22083 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
22085 static bool warned;
22086 if (!warned && warn_psabi && !in_system_header)
22089 inform (input_location,
22090 "the mangling of %<va_list%> has changed in GCC 4.4");
22092 return "St9__va_list";
22095 /* Half-precision float. */
22096 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
22099 if (TREE_CODE (type) != VECTOR_TYPE)
22102 /* Check the mode of the vector type, and the name of the vector
22103 element type, against the table. */
22104 while (pos->mode != VOIDmode)
22106 tree elt_type = TREE_TYPE (type);
22108 if (pos->mode == TYPE_MODE (type)
22109 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
22110 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
22111 pos->element_type_name))
22112 return pos->aapcs_name;
22117 /* Use the default mangling for unrecognized (possibly user-defined)
22122 /* Order of allocation of core registers for Thumb: this allocation is
22123 written over the corresponding initial entries of the array
22124 initialized with REG_ALLOC_ORDER. We allocate all low registers
22125 first. Saving and restoring a low register is usually cheaper than
22126 using a call-clobbered high register. */
22128 static const int thumb_core_reg_alloc_order[] =
22130 3, 2, 1, 0, 4, 5, 6, 7,
22131 14, 12, 8, 9, 10, 11, 13, 15
22134 /* Adjust register allocation order when compiling for Thumb. */
22137 arm_order_regs_for_local_alloc (void)
22139 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
22140 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
22142 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
22143 sizeof (thumb_core_reg_alloc_order));
22146 /* Set default optimization options. */
22148 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
22150 /* Enable section anchors by default at -O1 or higher.
22151 Use 2 to distinguish from an explicit -fsection-anchors
22152 given on the command line. */
22154 flag_section_anchors = 2;
22157 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
22160 arm_frame_pointer_required (void)
22162 return (cfun->has_nonlocal_label
22163 || SUBTARGET_FRAME_POINTER_REQUIRED
22164 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
22167 /* Only thumb1 can't support conditional execution, so return true if
22168 the target is not thumb1. */
22170 arm_have_conditional_execution (void)
22172 return !TARGET_THUMB1;
22175 #include "gt-arm.h"