1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
49 #include "c-family/c-pragma.h" /* ??? */
50 #include "integrate.h"
53 #include "target-def.h"
55 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
67 /* Forward function declarations. */
68 static int arm_compute_static_chain_stack_bytes (void);
69 static arm_stack_offsets *arm_get_frame_offsets (void);
70 static void arm_add_gc_roots (void);
71 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
72 HOST_WIDE_INT, rtx, rtx, int, int);
73 static unsigned bit_count (unsigned long);
74 static int arm_address_register_rtx_p (rtx, int);
75 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
76 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
77 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
78 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
79 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
80 inline static int thumb1_index_register_rtx_p (rtx, int);
81 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
82 static int thumb_far_jump_used_p (void);
83 static bool thumb_force_lr_save (void);
84 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
85 static rtx emit_sfm (int, int);
86 static unsigned arm_size_return_regs (void);
87 static bool arm_assemble_integer (rtx, unsigned int, int);
88 static void arm_print_operand (FILE *, rtx, int);
89 static void arm_print_operand_address (FILE *, rtx);
90 static bool arm_print_operand_punct_valid_p (unsigned char code);
91 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
92 static arm_cc get_arm_condition_code (rtx);
93 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
94 static rtx is_jump_table (rtx);
95 static const char *output_multi_immediate (rtx *, const char *, const char *,
97 static const char *shift_op (rtx, HOST_WIDE_INT *);
98 static struct machine_function *arm_init_machine_status (void);
99 static void thumb_exit (FILE *, int);
100 static rtx is_jump_table (rtx);
101 static HOST_WIDE_INT get_jump_table_size (rtx);
102 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
103 static Mnode *add_minipool_forward_ref (Mfix *);
104 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
105 static Mnode *add_minipool_backward_ref (Mfix *);
106 static void assign_minipool_offsets (Mfix *);
107 static void arm_print_value (FILE *, rtx);
108 static void dump_minipool (rtx);
109 static int arm_barrier_cost (rtx);
110 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
111 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
112 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
114 static void arm_reorg (void);
115 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
116 static unsigned long arm_compute_save_reg0_reg12_mask (void);
117 static unsigned long arm_compute_save_reg_mask (void);
118 static unsigned long arm_isr_value (tree);
119 static unsigned long arm_compute_func_type (void);
120 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
121 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
123 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
124 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
126 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
127 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
128 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static int arm_comp_type_attributes (const_tree, const_tree);
130 static void arm_set_default_type_attributes (tree);
131 static int arm_adjust_cost (rtx, rtx, rtx, int);
132 static int count_insns_for_constant (HOST_WIDE_INT, int);
133 static int arm_get_strip_length (int);
134 static bool arm_function_ok_for_sibcall (tree, tree);
135 static enum machine_mode arm_promote_function_mode (const_tree,
136 enum machine_mode, int *,
138 static bool arm_return_in_memory (const_tree, const_tree);
139 static rtx arm_function_value (const_tree, const_tree, bool);
140 static rtx arm_libcall_value (enum machine_mode, const_rtx);
142 static void arm_internal_label (FILE *, const char *, unsigned long);
143 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
145 static bool arm_have_conditional_execution (void);
146 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
147 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
148 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
149 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_rtx_costs (rtx, int, int, int *, bool);
153 static int arm_address_cost (rtx, bool);
154 static bool arm_memory_load_p (rtx);
155 static bool arm_cirrus_insn_p (rtx);
156 static void cirrus_reorg (rtx);
157 static void arm_init_builtins (void);
158 static void arm_init_iwmmxt_builtins (void);
159 static rtx safe_vector_operand (rtx, enum machine_mode);
160 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
161 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
162 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
163 static void emit_constant_insn (rtx cond, rtx pattern);
164 static rtx emit_set_insn (rtx, rtx);
165 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
167 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
169 static int aapcs_select_return_coproc (const_tree, const_tree);
171 #ifdef OBJECT_FORMAT_ELF
172 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
173 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
176 static void arm_encode_section_info (tree, rtx, int);
179 static void arm_file_end (void);
180 static void arm_file_start (void);
182 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
184 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
185 enum machine_mode, const_tree, bool);
186 static bool arm_promote_prototypes (const_tree);
187 static bool arm_default_short_enums (void);
188 static bool arm_align_anon_bitfield (void);
189 static bool arm_return_in_msb (const_tree);
190 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
191 static bool arm_return_in_memory (const_tree, const_tree);
192 #ifdef TARGET_UNWIND_INFO
193 static void arm_unwind_emit (FILE *, rtx);
194 static bool arm_output_ttype (rtx);
196 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
197 static rtx arm_dwarf_register_span (rtx);
199 static tree arm_cxx_guard_type (void);
200 static bool arm_cxx_guard_mask_bit (void);
201 static tree arm_get_cookie_size (tree);
202 static bool arm_cookie_has_size (void);
203 static bool arm_cxx_cdtor_returns_this (void);
204 static bool arm_cxx_key_method_may_be_inline (void);
205 static void arm_cxx_determine_class_data_visibility (tree);
206 static bool arm_cxx_class_data_always_comdat (void);
207 static bool arm_cxx_use_aeabi_atexit (void);
208 static void arm_init_libfuncs (void);
209 static tree arm_build_builtin_va_list (void);
210 static void arm_expand_builtin_va_start (tree, rtx);
211 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
212 static bool arm_handle_option (size_t, const char *, int);
213 static void arm_target_help (void);
214 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
215 static bool arm_cannot_copy_insn_p (rtx);
216 static bool arm_tls_symbol_p (rtx x);
217 static int arm_issue_rate (void);
218 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
219 static bool arm_allocate_stack_slots_for_args (void);
220 static const char *arm_invalid_parameter_type (const_tree t);
221 static const char *arm_invalid_return_type (const_tree t);
222 static tree arm_promoted_type (const_tree t);
223 static tree arm_convert_to_type (tree type, tree expr);
224 static bool arm_scalar_mode_supported_p (enum machine_mode);
225 static bool arm_frame_pointer_required (void);
226 static bool arm_can_eliminate (const int, const int);
227 static void arm_asm_trampoline_template (FILE *);
228 static void arm_trampoline_init (rtx, tree, rtx);
229 static rtx arm_trampoline_adjust_address (rtx);
230 static rtx arm_pic_static_addr (rtx orig, rtx reg);
231 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
232 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
235 /* Table of machine attributes. */
236 static const struct attribute_spec arm_attribute_table[] =
238 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
239 /* Function calls made to this symbol must be done indirectly, because
240 it may lie outside of the 26 bit addressing range of a normal function
242 { "long_call", 0, 0, false, true, true, NULL },
243 /* Whereas these functions are always known to reside within the 26 bit
245 { "short_call", 0, 0, false, true, true, NULL },
246 /* Specify the procedure call conventions for a function. */
247 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
248 /* Interrupt Service Routines have special prologue and epilogue requirements. */
249 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
250 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
251 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
253 /* ARM/PE has three new attributes:
255 dllexport - for exporting a function/variable that will live in a dll
256 dllimport - for importing a function/variable from a dll
258 Microsoft allows multiple declspecs in one __declspec, separating
259 them with spaces. We do NOT support this. Instead, use __declspec
262 { "dllimport", 0, 0, true, false, false, NULL },
263 { "dllexport", 0, 0, true, false, false, NULL },
264 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
265 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
266 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
267 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
268 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
270 { NULL, 0, 0, false, false, false, NULL }
273 /* Initialize the GCC target structure. */
274 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
275 #undef TARGET_MERGE_DECL_ATTRIBUTES
276 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
279 #undef TARGET_LEGITIMIZE_ADDRESS
280 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
282 #undef TARGET_ATTRIBUTE_TABLE
283 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
285 #undef TARGET_ASM_FILE_START
286 #define TARGET_ASM_FILE_START arm_file_start
287 #undef TARGET_ASM_FILE_END
288 #define TARGET_ASM_FILE_END arm_file_end
290 #undef TARGET_ASM_ALIGNED_SI_OP
291 #define TARGET_ASM_ALIGNED_SI_OP NULL
292 #undef TARGET_ASM_INTEGER
293 #define TARGET_ASM_INTEGER arm_assemble_integer
295 #undef TARGET_PRINT_OPERAND
296 #define TARGET_PRINT_OPERAND arm_print_operand
297 #undef TARGET_PRINT_OPERAND_ADDRESS
298 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
299 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
300 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
302 #undef TARGET_ASM_FUNCTION_PROLOGUE
303 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
305 #undef TARGET_ASM_FUNCTION_EPILOGUE
306 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
308 #undef TARGET_DEFAULT_TARGET_FLAGS
309 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
310 #undef TARGET_HANDLE_OPTION
311 #define TARGET_HANDLE_OPTION arm_handle_option
313 #define TARGET_HELP arm_target_help
315 #undef TARGET_COMP_TYPE_ATTRIBUTES
316 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
318 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
319 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
321 #undef TARGET_SCHED_ADJUST_COST
322 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
324 #undef TARGET_ENCODE_SECTION_INFO
326 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
328 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
331 #undef TARGET_STRIP_NAME_ENCODING
332 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
334 #undef TARGET_ASM_INTERNAL_LABEL
335 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
337 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
338 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
340 #undef TARGET_FUNCTION_VALUE
341 #define TARGET_FUNCTION_VALUE arm_function_value
343 #undef TARGET_LIBCALL_VALUE
344 #define TARGET_LIBCALL_VALUE arm_libcall_value
346 #undef TARGET_ASM_OUTPUT_MI_THUNK
347 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
348 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
349 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
351 #undef TARGET_RTX_COSTS
352 #define TARGET_RTX_COSTS arm_rtx_costs
353 #undef TARGET_ADDRESS_COST
354 #define TARGET_ADDRESS_COST arm_address_cost
356 #undef TARGET_SHIFT_TRUNCATION_MASK
357 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
358 #undef TARGET_VECTOR_MODE_SUPPORTED_P
359 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
361 #undef TARGET_MACHINE_DEPENDENT_REORG
362 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
364 #undef TARGET_INIT_BUILTINS
365 #define TARGET_INIT_BUILTINS arm_init_builtins
366 #undef TARGET_EXPAND_BUILTIN
367 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
369 #undef TARGET_INIT_LIBFUNCS
370 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
372 #undef TARGET_PROMOTE_FUNCTION_MODE
373 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
374 #undef TARGET_PROMOTE_PROTOTYPES
375 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
376 #undef TARGET_PASS_BY_REFERENCE
377 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
378 #undef TARGET_ARG_PARTIAL_BYTES
379 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
381 #undef TARGET_SETUP_INCOMING_VARARGS
382 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
384 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
385 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
387 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
388 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
389 #undef TARGET_TRAMPOLINE_INIT
390 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
391 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
392 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
394 #undef TARGET_DEFAULT_SHORT_ENUMS
395 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
397 #undef TARGET_ALIGN_ANON_BITFIELD
398 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
400 #undef TARGET_NARROW_VOLATILE_BITFIELD
401 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
403 #undef TARGET_CXX_GUARD_TYPE
404 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
406 #undef TARGET_CXX_GUARD_MASK_BIT
407 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
409 #undef TARGET_CXX_GET_COOKIE_SIZE
410 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
412 #undef TARGET_CXX_COOKIE_HAS_SIZE
413 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
415 #undef TARGET_CXX_CDTOR_RETURNS_THIS
416 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
418 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
419 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
421 #undef TARGET_CXX_USE_AEABI_ATEXIT
422 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
424 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
425 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
426 arm_cxx_determine_class_data_visibility
428 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
429 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
431 #undef TARGET_RETURN_IN_MSB
432 #define TARGET_RETURN_IN_MSB arm_return_in_msb
434 #undef TARGET_RETURN_IN_MEMORY
435 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
437 #undef TARGET_MUST_PASS_IN_STACK
438 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
440 #ifdef TARGET_UNWIND_INFO
441 #undef TARGET_ASM_UNWIND_EMIT
442 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
444 /* EABI unwinding tables use a different format for the typeinfo tables. */
445 #undef TARGET_ASM_TTYPE
446 #define TARGET_ASM_TTYPE arm_output_ttype
448 #undef TARGET_ARM_EABI_UNWINDER
449 #define TARGET_ARM_EABI_UNWINDER true
450 #endif /* TARGET_UNWIND_INFO */
452 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
453 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
455 #undef TARGET_DWARF_REGISTER_SPAN
456 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
458 #undef TARGET_CANNOT_COPY_INSN_P
459 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
462 #undef TARGET_HAVE_TLS
463 #define TARGET_HAVE_TLS true
466 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
467 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
469 #undef TARGET_CANNOT_FORCE_CONST_MEM
470 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
472 #undef TARGET_MAX_ANCHOR_OFFSET
473 #define TARGET_MAX_ANCHOR_OFFSET 4095
475 /* The minimum is set such that the total size of the block
476 for a particular anchor is -4088 + 1 + 4095 bytes, which is
477 divisible by eight, ensuring natural spacing of anchors. */
478 #undef TARGET_MIN_ANCHOR_OFFSET
479 #define TARGET_MIN_ANCHOR_OFFSET -4088
481 #undef TARGET_SCHED_ISSUE_RATE
482 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
484 #undef TARGET_MANGLE_TYPE
485 #define TARGET_MANGLE_TYPE arm_mangle_type
487 #undef TARGET_BUILD_BUILTIN_VA_LIST
488 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
489 #undef TARGET_EXPAND_BUILTIN_VA_START
490 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
491 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
492 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
495 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
496 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
499 #undef TARGET_LEGITIMATE_ADDRESS_P
500 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
502 #undef TARGET_INVALID_PARAMETER_TYPE
503 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
505 #undef TARGET_INVALID_RETURN_TYPE
506 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
508 #undef TARGET_PROMOTED_TYPE
509 #define TARGET_PROMOTED_TYPE arm_promoted_type
511 #undef TARGET_CONVERT_TO_TYPE
512 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
514 #undef TARGET_SCALAR_MODE_SUPPORTED_P
515 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
517 #undef TARGET_FRAME_POINTER_REQUIRED
518 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
520 #undef TARGET_CAN_ELIMINATE
521 #define TARGET_CAN_ELIMINATE arm_can_eliminate
523 struct gcc_target targetm = TARGET_INITIALIZER;
525 /* Obstack for minipool constant handling. */
526 static struct obstack minipool_obstack;
527 static char * minipool_startobj;
529 /* The maximum number of insns skipped which
530 will be conditionalised if possible. */
531 static int max_insns_skipped = 5;
533 extern FILE * asm_out_file;
535 /* True if we are currently building a constant table. */
536 int making_const_table;
538 /* The processor for which instructions should be scheduled. */
539 enum processor_type arm_tune = arm_none;
541 /* The current tuning set. */
542 const struct tune_params *current_tune;
544 /* Which floating point hardware to schedule for. */
547 /* Which floating popint hardware to use. */
548 const struct arm_fpu_desc *arm_fpu_desc;
550 /* Whether to use floating point hardware. */
551 enum float_abi_type arm_float_abi;
553 /* Which __fp16 format to use. */
554 enum arm_fp16_format_type arm_fp16_format;
556 /* Which ABI to use. */
557 enum arm_abi_type arm_abi;
559 /* Which thread pointer model to use. */
560 enum arm_tp_type target_thread_pointer = TP_AUTO;
562 /* Used to parse -mstructure_size_boundary command line option. */
563 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
565 /* Used for Thumb call_via trampolines. */
566 rtx thumb_call_via_label[14];
567 static int thumb_call_reg_needed;
569 /* Bit values used to identify processor capabilities. */
570 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
571 #define FL_ARCH3M (1 << 1) /* Extended multiply */
572 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
573 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
574 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
575 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
576 #define FL_THUMB (1 << 6) /* Thumb aware */
577 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
578 #define FL_STRONG (1 << 8) /* StrongARM */
579 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
580 #define FL_XSCALE (1 << 10) /* XScale */
581 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
582 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
583 media instructions. */
584 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
585 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
586 Note: ARM6 & 7 derivatives only. */
587 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
588 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
589 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
591 #define FL_DIV (1 << 18) /* Hardware divide. */
592 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
593 #define FL_NEON (1 << 20) /* Neon instructions. */
594 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
597 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
599 /* Flags that only effect tuning, not available instructions. */
600 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
603 #define FL_FOR_ARCH2 FL_NOTM
604 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
605 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
606 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
607 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
608 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
609 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
610 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
611 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
612 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
613 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
614 #define FL_FOR_ARCH6J FL_FOR_ARCH6
615 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
616 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
617 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
618 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
619 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
620 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
621 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
622 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
623 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
624 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
626 /* The bits in this mask specify which
627 instructions we are allowed to generate. */
628 static unsigned long insn_flags = 0;
630 /* The bits in this mask specify which instruction scheduling options should
632 static unsigned long tune_flags = 0;
634 /* The following are used in the arm.md file as equivalents to bits
635 in the above two flag variables. */
637 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
640 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
643 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
646 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
649 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
652 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
655 /* Nonzero if this chip supports the ARM 6K extensions. */
658 /* Nonzero if instructions not present in the 'M' profile can be used. */
659 int arm_arch_notm = 0;
661 /* Nonzero if instructions present in ARMv7E-M can be used. */
664 /* Nonzero if this chip can benefit from load scheduling. */
665 int arm_ld_sched = 0;
667 /* Nonzero if this chip is a StrongARM. */
668 int arm_tune_strongarm = 0;
670 /* Nonzero if this chip is a Cirrus variant. */
671 int arm_arch_cirrus = 0;
673 /* Nonzero if this chip supports Intel Wireless MMX technology. */
674 int arm_arch_iwmmxt = 0;
676 /* Nonzero if this chip is an XScale. */
677 int arm_arch_xscale = 0;
679 /* Nonzero if tuning for XScale */
680 int arm_tune_xscale = 0;
682 /* Nonzero if we want to tune for stores that access the write-buffer.
683 This typically means an ARM6 or ARM7 with MMU or MPU. */
684 int arm_tune_wbuf = 0;
686 /* Nonzero if tuning for Cortex-A9. */
687 int arm_tune_cortex_a9 = 0;
689 /* Nonzero if generating Thumb instructions. */
692 /* Nonzero if generating Thumb-1 instructions. */
695 /* Nonzero if we should define __THUMB_INTERWORK__ in the
697 XXX This is a bit of a hack, it's intended to help work around
698 problems in GLD which doesn't understand that armv5t code is
699 interworking clean. */
700 int arm_cpp_interwork = 0;
702 /* Nonzero if chip supports Thumb 2. */
705 /* Nonzero if chip supports integer division instruction. */
708 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
709 we must report the mode of the memory reference from
710 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
711 enum machine_mode output_memory_reference_mode;
713 /* The register number to be used for the PIC offset register. */
714 unsigned arm_pic_register = INVALID_REGNUM;
716 /* Set to 1 after arm_reorg has started. Reset to start at the start of
717 the next function. */
718 static int after_arm_reorg = 0;
720 enum arm_pcs arm_pcs_default;
722 /* For an explanation of these variables, see final_prescan_insn below. */
724 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
725 enum arm_cond_code arm_current_cc;
728 int arm_target_label;
729 /* The number of conditionally executed insns, including the current insn. */
730 int arm_condexec_count = 0;
731 /* A bitmask specifying the patterns for the IT block.
732 Zero means do not output an IT block before this insn. */
733 int arm_condexec_mask = 0;
734 /* The number of bits used in arm_condexec_mask. */
735 int arm_condexec_masklen = 0;
737 /* The condition codes of the ARM, and the inverse function. */
738 static const char * const arm_condition_codes[] =
740 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
741 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
744 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
745 int arm_regs_in_sequence[] =
747 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
750 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
751 #define streq(string1, string2) (strcmp (string1, string2) == 0)
753 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
754 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
755 | (1 << PIC_OFFSET_TABLE_REGNUM)))
757 /* Initialization code. */
761 const char *const name;
762 enum processor_type core;
764 const unsigned long flags;
765 const struct tune_params *const tune;
768 const struct tune_params arm_slowmul_tune =
770 arm_slowmul_rtx_costs,
775 const struct tune_params arm_fastmul_tune =
777 arm_fastmul_rtx_costs,
782 const struct tune_params arm_xscale_tune =
784 arm_xscale_rtx_costs,
785 xscale_sched_adjust_cost,
789 const struct tune_params arm_9e_tune =
796 const struct tune_params arm_cortex_a9_tune =
799 cortex_a9_sched_adjust_cost,
804 /* Not all of these give usefully different compilation alternatives,
805 but there is no simple way of generalizing them. */
806 static const struct processors all_cores[] =
809 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
810 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
811 #include "arm-cores.def"
813 {NULL, arm_none, NULL, 0, NULL}
816 static const struct processors all_architectures[] =
818 /* ARM Architectures */
819 /* We don't specify tuning costs here as it will be figured out
822 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
823 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
824 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
825 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
826 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
827 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
828 implementations that support it, so we will leave it out for now. */
829 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
830 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
831 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
832 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
833 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
834 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
835 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
836 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
837 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
838 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
839 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
840 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
841 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
842 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
843 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
844 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
845 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
846 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
847 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
848 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
849 {NULL, arm_none, NULL, 0 , NULL}
853 /* These are populated as commandline arguments are processed, or NULL
855 static const struct processors *arm_selected_arch;
856 static const struct processors *arm_selected_cpu;
857 static const struct processors *arm_selected_tune;
859 /* The name of the preprocessor macro to define for this architecture. */
861 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
863 /* Available values for -mfpu=. */
865 static const struct arm_fpu_desc all_fpus[] =
867 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
868 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
869 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
870 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
871 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
872 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
873 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
874 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
875 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
876 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
877 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
878 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
879 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
880 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
881 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
882 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
883 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
884 /* Compatibility aliases. */
885 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
892 enum float_abi_type abi_type;
896 /* Available values for -mfloat-abi=. */
898 static const struct float_abi all_float_abis[] =
900 {"soft", ARM_FLOAT_ABI_SOFT},
901 {"softfp", ARM_FLOAT_ABI_SOFTFP},
902 {"hard", ARM_FLOAT_ABI_HARD}
909 enum arm_fp16_format_type fp16_format_type;
913 /* Available values for -mfp16-format=. */
915 static const struct fp16_format all_fp16_formats[] =
917 {"none", ARM_FP16_FORMAT_NONE},
918 {"ieee", ARM_FP16_FORMAT_IEEE},
919 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
926 enum arm_abi_type abi_type;
930 /* Available values for -mabi=. */
932 static const struct abi_name arm_all_abis[] =
934 {"apcs-gnu", ARM_ABI_APCS},
935 {"atpcs", ARM_ABI_ATPCS},
936 {"aapcs", ARM_ABI_AAPCS},
937 {"iwmmxt", ARM_ABI_IWMMXT},
938 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
941 /* Supported TLS relocations. */
951 /* The maximum number of insns to be used when loading a constant. */
953 arm_constant_limit (bool size_p)
955 return size_p ? 1 : current_tune->constant_limit;
958 /* Emit an insn that's a simple single-set. Both the operands must be known
961 emit_set_insn (rtx x, rtx y)
963 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
966 /* Return the number of bits set in VALUE. */
968 bit_count (unsigned long value)
970 unsigned long count = 0;
975 value &= value - 1; /* Clear the least-significant set bit. */
981 /* Set up library functions unique to ARM. */
984 arm_init_libfuncs (void)
986 /* There are no special library functions unless we are using the
991 /* The functions below are described in Section 4 of the "Run-Time
992 ABI for the ARM architecture", Version 1.0. */
994 /* Double-precision floating-point arithmetic. Table 2. */
995 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
996 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
997 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
998 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
999 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1001 /* Double-precision comparisons. Table 3. */
1002 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1003 set_optab_libfunc (ne_optab, DFmode, NULL);
1004 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1005 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1006 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1007 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1008 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1010 /* Single-precision floating-point arithmetic. Table 4. */
1011 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1012 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1013 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1014 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1015 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1017 /* Single-precision comparisons. Table 5. */
1018 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1019 set_optab_libfunc (ne_optab, SFmode, NULL);
1020 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1021 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1022 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1023 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1024 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1026 /* Floating-point to integer conversions. Table 6. */
1027 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1028 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1029 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1030 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1031 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1032 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1033 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1034 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1036 /* Conversions between floating types. Table 7. */
1037 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1038 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1040 /* Integer to floating-point conversions. Table 8. */
1041 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1042 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1043 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1044 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1045 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1046 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1047 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1048 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1050 /* Long long. Table 9. */
1051 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1052 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1053 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1054 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1055 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1056 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1057 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1058 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1060 /* Integer (32/32->32) division. \S 4.3.1. */
1061 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1062 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1064 /* The divmod functions are designed so that they can be used for
1065 plain division, even though they return both the quotient and the
1066 remainder. The quotient is returned in the usual location (i.e.,
1067 r0 for SImode, {r0, r1} for DImode), just as would be expected
1068 for an ordinary division routine. Because the AAPCS calling
1069 conventions specify that all of { r0, r1, r2, r3 } are
1070 callee-saved registers, there is no need to tell the compiler
1071 explicitly that those registers are clobbered by these
1073 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1074 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1076 /* For SImode division the ABI provides div-without-mod routines,
1077 which are faster. */
1078 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1079 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1081 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1082 divmod libcalls instead. */
1083 set_optab_libfunc (smod_optab, DImode, NULL);
1084 set_optab_libfunc (umod_optab, DImode, NULL);
1085 set_optab_libfunc (smod_optab, SImode, NULL);
1086 set_optab_libfunc (umod_optab, SImode, NULL);
1088 /* Half-precision float operations. The compiler handles all operations
1089 with NULL libfuncs by converting the SFmode. */
1090 switch (arm_fp16_format)
1092 case ARM_FP16_FORMAT_IEEE:
1093 case ARM_FP16_FORMAT_ALTERNATIVE:
1096 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1097 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1099 : "__gnu_f2h_alternative"));
1100 set_conv_libfunc (sext_optab, SFmode, HFmode,
1101 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1103 : "__gnu_h2f_alternative"));
1106 set_optab_libfunc (add_optab, HFmode, NULL);
1107 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1108 set_optab_libfunc (smul_optab, HFmode, NULL);
1109 set_optab_libfunc (neg_optab, HFmode, NULL);
1110 set_optab_libfunc (sub_optab, HFmode, NULL);
1113 set_optab_libfunc (eq_optab, HFmode, NULL);
1114 set_optab_libfunc (ne_optab, HFmode, NULL);
1115 set_optab_libfunc (lt_optab, HFmode, NULL);
1116 set_optab_libfunc (le_optab, HFmode, NULL);
1117 set_optab_libfunc (ge_optab, HFmode, NULL);
1118 set_optab_libfunc (gt_optab, HFmode, NULL);
1119 set_optab_libfunc (unord_optab, HFmode, NULL);
1126 if (TARGET_AAPCS_BASED)
1127 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1130 /* On AAPCS systems, this is the "struct __va_list". */
1131 static GTY(()) tree va_list_type;
1133 /* Return the type to use as __builtin_va_list. */
1135 arm_build_builtin_va_list (void)
1140 if (!TARGET_AAPCS_BASED)
1141 return std_build_builtin_va_list ();
1143 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1151 The C Library ABI further reinforces this definition in \S
1154 We must follow this definition exactly. The structure tag
1155 name is visible in C++ mangled names, and thus forms a part
1156 of the ABI. The field name may be used by people who
1157 #include <stdarg.h>. */
1158 /* Create the type. */
1159 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1160 /* Give it the required name. */
1161 va_list_name = build_decl (BUILTINS_LOCATION,
1163 get_identifier ("__va_list"),
1165 DECL_ARTIFICIAL (va_list_name) = 1;
1166 TYPE_NAME (va_list_type) = va_list_name;
1167 /* Create the __ap field. */
1168 ap_field = build_decl (BUILTINS_LOCATION,
1170 get_identifier ("__ap"),
1172 DECL_ARTIFICIAL (ap_field) = 1;
1173 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1174 TYPE_FIELDS (va_list_type) = ap_field;
1175 /* Compute its layout. */
1176 layout_type (va_list_type);
1178 return va_list_type;
1181 /* Return an expression of type "void *" pointing to the next
1182 available argument in a variable-argument list. VALIST is the
1183 user-level va_list object, of type __builtin_va_list. */
1185 arm_extract_valist_ptr (tree valist)
1187 if (TREE_TYPE (valist) == error_mark_node)
1188 return error_mark_node;
1190 /* On an AAPCS target, the pointer is stored within "struct
1192 if (TARGET_AAPCS_BASED)
1194 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1195 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1196 valist, ap_field, NULL_TREE);
1202 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1204 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1206 valist = arm_extract_valist_ptr (valist);
1207 std_expand_builtin_va_start (valist, nextarg);
1210 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1212 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1215 valist = arm_extract_valist_ptr (valist);
1216 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1219 /* Lookup NAME in SEL. */
1221 static const struct processors *
1222 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1224 if (!(name && *name))
1227 for (; sel->name != NULL; sel++)
1229 if (streq (name, sel->name))
1233 error ("bad value (%s) for %s switch", name, desc);
1237 /* Implement TARGET_HANDLE_OPTION. */
1240 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1245 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1249 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1252 case OPT_mhard_float:
1253 target_float_abi_name = "hard";
1256 case OPT_msoft_float:
1257 target_float_abi_name = "soft";
1261 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1270 arm_target_help (void)
1273 static int columns = 0;
1276 /* If we have not done so already, obtain the desired maximum width of
1277 the output. Note - this is a duplication of the code at the start of
1278 gcc/opts.c:print_specific_help() - the two copies should probably be
1279 replaced by a single function. */
1284 GET_ENVIRONMENT (p, "COLUMNS");
1287 int value = atoi (p);
1294 /* Use a reasonable default. */
1298 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1300 /* The - 2 is because we know that the last entry in the array is NULL. */
1301 i = ARRAY_SIZE (all_cores) - 2;
1303 printf (" %s", all_cores[i].name);
1304 remaining = columns - (strlen (all_cores[i].name) + 4);
1305 gcc_assert (remaining >= 0);
1309 int len = strlen (all_cores[i].name);
1311 if (remaining > len + 2)
1313 printf (", %s", all_cores[i].name);
1314 remaining -= len + 2;
1320 printf ("\n %s", all_cores[i].name);
1321 remaining = columns - (len + 4);
1325 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1327 i = ARRAY_SIZE (all_architectures) - 2;
1330 printf (" %s", all_architectures[i].name);
1331 remaining = columns - (strlen (all_architectures[i].name) + 4);
1332 gcc_assert (remaining >= 0);
1336 int len = strlen (all_architectures[i].name);
1338 if (remaining > len + 2)
1340 printf (", %s", all_architectures[i].name);
1341 remaining -= len + 2;
1347 printf ("\n %s", all_architectures[i].name);
1348 remaining = columns - (len + 4);
1355 /* Fix up any incompatible options that the user has specified.
1356 This has now turned into a maze. */
1358 arm_override_options (void)
1362 if (arm_selected_arch)
1364 if (arm_selected_cpu)
1366 /* Check for conflict between mcpu and march. */
1367 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1369 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1370 arm_selected_cpu->name, arm_selected_arch->name);
1371 /* -march wins for code generation.
1372 -mcpu wins for default tuning. */
1373 if (!arm_selected_tune)
1374 arm_selected_tune = arm_selected_cpu;
1376 arm_selected_cpu = arm_selected_arch;
1380 arm_selected_arch = NULL;
1383 /* Pick a CPU based on the architecture. */
1384 arm_selected_cpu = arm_selected_arch;
1387 /* If the user did not specify a processor, choose one for them. */
1388 if (!arm_selected_cpu)
1390 const struct processors * sel;
1391 unsigned int sought;
1393 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1394 if (!arm_selected_cpu->name)
1396 #ifdef SUBTARGET_CPU_DEFAULT
1397 /* Use the subtarget default CPU if none was specified by
1399 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1401 /* Default to ARM6. */
1402 if (arm_selected_cpu->name)
1403 arm_selected_cpu = &all_cores[arm6];
1406 sel = arm_selected_cpu;
1407 insn_flags = sel->flags;
1409 /* Now check to see if the user has specified some command line
1410 switch that require certain abilities from the cpu. */
1413 if (TARGET_INTERWORK || TARGET_THUMB)
1415 sought |= (FL_THUMB | FL_MODE32);
1417 /* There are no ARM processors that support both APCS-26 and
1418 interworking. Therefore we force FL_MODE26 to be removed
1419 from insn_flags here (if it was set), so that the search
1420 below will always be able to find a compatible processor. */
1421 insn_flags &= ~FL_MODE26;
1424 if (sought != 0 && ((sought & insn_flags) != sought))
1426 /* Try to locate a CPU type that supports all of the abilities
1427 of the default CPU, plus the extra abilities requested by
1429 for (sel = all_cores; sel->name != NULL; sel++)
1430 if ((sel->flags & sought) == (sought | insn_flags))
1433 if (sel->name == NULL)
1435 unsigned current_bit_count = 0;
1436 const struct processors * best_fit = NULL;
1438 /* Ideally we would like to issue an error message here
1439 saying that it was not possible to find a CPU compatible
1440 with the default CPU, but which also supports the command
1441 line options specified by the programmer, and so they
1442 ought to use the -mcpu=<name> command line option to
1443 override the default CPU type.
1445 If we cannot find a cpu that has both the
1446 characteristics of the default cpu and the given
1447 command line options we scan the array again looking
1448 for a best match. */
1449 for (sel = all_cores; sel->name != NULL; sel++)
1450 if ((sel->flags & sought) == sought)
1454 count = bit_count (sel->flags & insn_flags);
1456 if (count >= current_bit_count)
1459 current_bit_count = count;
1463 gcc_assert (best_fit);
1467 arm_selected_cpu = sel;
1471 gcc_assert (arm_selected_cpu);
1472 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1473 if (!arm_selected_tune)
1474 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1476 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1477 insn_flags = arm_selected_cpu->flags;
1479 arm_tune = arm_selected_tune->core;
1480 tune_flags = arm_selected_tune->flags;
1481 current_tune = arm_selected_tune->tune;
1483 if (target_fp16_format_name)
1485 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1487 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1489 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1493 if (i == ARRAY_SIZE (all_fp16_formats))
1494 error ("invalid __fp16 format option: -mfp16-format=%s",
1495 target_fp16_format_name);
1498 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1500 if (target_abi_name)
1502 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1504 if (streq (arm_all_abis[i].name, target_abi_name))
1506 arm_abi = arm_all_abis[i].abi_type;
1510 if (i == ARRAY_SIZE (arm_all_abis))
1511 error ("invalid ABI option: -mabi=%s", target_abi_name);
1514 arm_abi = ARM_DEFAULT_ABI;
1516 /* Make sure that the processor choice does not conflict with any of the
1517 other command line choices. */
1518 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1519 error ("target CPU does not support ARM mode");
1521 /* BPABI targets use linker tricks to allow interworking on cores
1522 without thumb support. */
1523 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1525 warning (0, "target CPU does not support interworking" );
1526 target_flags &= ~MASK_INTERWORK;
1529 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1531 warning (0, "target CPU does not support THUMB instructions");
1532 target_flags &= ~MASK_THUMB;
1535 if (TARGET_APCS_FRAME && TARGET_THUMB)
1537 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1538 target_flags &= ~MASK_APCS_FRAME;
1541 /* Callee super interworking implies thumb interworking. Adding
1542 this to the flags here simplifies the logic elsewhere. */
1543 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1544 target_flags |= MASK_INTERWORK;
1546 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1547 from here where no function is being compiled currently. */
1548 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1549 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1551 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1552 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1554 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1555 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1557 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1559 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1560 target_flags |= MASK_APCS_FRAME;
1563 if (TARGET_POKE_FUNCTION_NAME)
1564 target_flags |= MASK_APCS_FRAME;
1566 if (TARGET_APCS_REENT && flag_pic)
1567 error ("-fpic and -mapcs-reent are incompatible");
1569 if (TARGET_APCS_REENT)
1570 warning (0, "APCS reentrant code not supported. Ignored");
1572 /* If this target is normally configured to use APCS frames, warn if they
1573 are turned off and debugging is turned on. */
1575 && write_symbols != NO_DEBUG
1576 && !TARGET_APCS_FRAME
1577 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1578 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1580 if (TARGET_APCS_FLOAT)
1581 warning (0, "passing floating point arguments in fp regs not yet supported");
1583 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1584 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1585 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1586 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1587 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1588 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1589 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1590 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1591 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1592 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1593 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1594 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1595 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1597 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1598 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1599 thumb_code = TARGET_ARM == 0;
1600 thumb1_code = TARGET_THUMB1 != 0;
1601 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1602 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1603 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1604 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1605 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1607 /* If we are not using the default (ARM mode) section anchor offset
1608 ranges, then set the correct ranges now. */
1611 /* Thumb-1 LDR instructions cannot have negative offsets.
1612 Permissible positive offset ranges are 5-bit (for byte loads),
1613 6-bit (for halfword loads), or 7-bit (for word loads).
1614 Empirical results suggest a 7-bit anchor range gives the best
1615 overall code size. */
1616 targetm.min_anchor_offset = 0;
1617 targetm.max_anchor_offset = 127;
1619 else if (TARGET_THUMB2)
1621 /* The minimum is set such that the total size of the block
1622 for a particular anchor is 248 + 1 + 4095 bytes, which is
1623 divisible by eight, ensuring natural spacing of anchors. */
1624 targetm.min_anchor_offset = -248;
1625 targetm.max_anchor_offset = 4095;
1628 /* V5 code we generate is completely interworking capable, so we turn off
1629 TARGET_INTERWORK here to avoid many tests later on. */
1631 /* XXX However, we must pass the right pre-processor defines to CPP
1632 or GLD can get confused. This is a hack. */
1633 if (TARGET_INTERWORK)
1634 arm_cpp_interwork = 1;
1637 target_flags &= ~MASK_INTERWORK;
1639 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1640 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1642 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1643 error ("iwmmxt abi requires an iwmmxt capable cpu");
1645 if (target_fpu_name == NULL && target_fpe_name != NULL)
1647 if (streq (target_fpe_name, "2"))
1648 target_fpu_name = "fpe2";
1649 else if (streq (target_fpe_name, "3"))
1650 target_fpu_name = "fpe3";
1652 error ("invalid floating point emulation option: -mfpe=%s",
1656 if (target_fpu_name == NULL)
1658 #ifdef FPUTYPE_DEFAULT
1659 target_fpu_name = FPUTYPE_DEFAULT;
1661 if (arm_arch_cirrus)
1662 target_fpu_name = "maverick";
1664 target_fpu_name = "fpe2";
1668 arm_fpu_desc = NULL;
1669 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1671 if (streq (all_fpus[i].name, target_fpu_name))
1673 arm_fpu_desc = &all_fpus[i];
1680 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1684 switch (arm_fpu_desc->model)
1686 case ARM_FP_MODEL_FPA:
1687 if (arm_fpu_desc->rev == 2)
1688 arm_fpu_attr = FPU_FPE2;
1689 else if (arm_fpu_desc->rev == 3)
1690 arm_fpu_attr = FPU_FPE3;
1692 arm_fpu_attr = FPU_FPA;
1695 case ARM_FP_MODEL_MAVERICK:
1696 arm_fpu_attr = FPU_MAVERICK;
1699 case ARM_FP_MODEL_VFP:
1700 arm_fpu_attr = FPU_VFP;
1707 if (target_float_abi_name != NULL)
1709 /* The user specified a FP ABI. */
1710 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1712 if (streq (all_float_abis[i].name, target_float_abi_name))
1714 arm_float_abi = all_float_abis[i].abi_type;
1718 if (i == ARRAY_SIZE (all_float_abis))
1719 error ("invalid floating point abi: -mfloat-abi=%s",
1720 target_float_abi_name);
1723 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1725 if (TARGET_AAPCS_BASED
1726 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1727 error ("FPA is unsupported in the AAPCS");
1729 if (TARGET_AAPCS_BASED)
1731 if (TARGET_CALLER_INTERWORKING)
1732 error ("AAPCS does not support -mcaller-super-interworking");
1734 if (TARGET_CALLEE_INTERWORKING)
1735 error ("AAPCS does not support -mcallee-super-interworking");
1738 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1739 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1740 will ever exist. GCC makes no attempt to support this combination. */
1741 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1742 sorry ("iWMMXt and hardware floating point");
1744 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1745 if (TARGET_THUMB2 && TARGET_IWMMXT)
1746 sorry ("Thumb-2 iWMMXt");
1748 /* __fp16 support currently assumes the core has ldrh. */
1749 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1750 sorry ("__fp16 and no ldrh");
1752 /* If soft-float is specified then don't use FPU. */
1753 if (TARGET_SOFT_FLOAT)
1754 arm_fpu_attr = FPU_NONE;
1756 if (TARGET_AAPCS_BASED)
1758 if (arm_abi == ARM_ABI_IWMMXT)
1759 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1760 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1761 && TARGET_HARD_FLOAT
1763 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1765 arm_pcs_default = ARM_PCS_AAPCS;
1769 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1770 sorry ("-mfloat-abi=hard and VFP");
1772 if (arm_abi == ARM_ABI_APCS)
1773 arm_pcs_default = ARM_PCS_APCS;
1775 arm_pcs_default = ARM_PCS_ATPCS;
1778 /* For arm2/3 there is no need to do any scheduling if there is only
1779 a floating point emulator, or we are doing software floating-point. */
1780 if ((TARGET_SOFT_FLOAT
1781 || (TARGET_FPA && arm_fpu_desc->rev))
1782 && (tune_flags & FL_MODE32) == 0)
1783 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1785 if (target_thread_switch)
1787 if (strcmp (target_thread_switch, "soft") == 0)
1788 target_thread_pointer = TP_SOFT;
1789 else if (strcmp (target_thread_switch, "auto") == 0)
1790 target_thread_pointer = TP_AUTO;
1791 else if (strcmp (target_thread_switch, "cp15") == 0)
1792 target_thread_pointer = TP_CP15;
1794 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1797 /* Use the cp15 method if it is available. */
1798 if (target_thread_pointer == TP_AUTO)
1800 if (arm_arch6k && !TARGET_THUMB1)
1801 target_thread_pointer = TP_CP15;
1803 target_thread_pointer = TP_SOFT;
1806 if (TARGET_HARD_TP && TARGET_THUMB1)
1807 error ("can not use -mtp=cp15 with 16-bit Thumb");
1809 /* Override the default structure alignment for AAPCS ABI. */
1810 if (TARGET_AAPCS_BASED)
1811 arm_structure_size_boundary = 8;
1813 if (structure_size_string != NULL)
1815 int size = strtol (structure_size_string, NULL, 0);
1817 if (size == 8 || size == 32
1818 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1819 arm_structure_size_boundary = size;
1821 warning (0, "structure size boundary can only be set to %s",
1822 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1825 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1827 error ("RTP PIC is incompatible with Thumb");
1831 /* If stack checking is disabled, we can use r10 as the PIC register,
1832 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1833 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1835 if (TARGET_VXWORKS_RTP)
1836 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1837 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1840 if (flag_pic && TARGET_VXWORKS_RTP)
1841 arm_pic_register = 9;
1843 if (arm_pic_register_string != NULL)
1845 int pic_register = decode_reg_name (arm_pic_register_string);
1848 warning (0, "-mpic-register= is useless without -fpic");
1850 /* Prevent the user from choosing an obviously stupid PIC register. */
1851 else if (pic_register < 0 || call_used_regs[pic_register]
1852 || pic_register == HARD_FRAME_POINTER_REGNUM
1853 || pic_register == STACK_POINTER_REGNUM
1854 || pic_register >= PC_REGNUM
1855 || (TARGET_VXWORKS_RTP
1856 && (unsigned int) pic_register != arm_pic_register))
1857 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1859 arm_pic_register = pic_register;
1862 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1863 if (fix_cm3_ldrd == 2)
1865 if (arm_selected_cpu->core == cortexm3)
1871 if (TARGET_THUMB1 && flag_schedule_insns)
1873 /* Don't warn since it's on by default in -O2. */
1874 flag_schedule_insns = 0;
1879 /* If optimizing for size, bump the number of instructions that we
1880 are prepared to conditionally execute (even on a StrongARM). */
1881 max_insns_skipped = 6;
1885 /* StrongARM has early execution of branches, so a sequence
1886 that is worth skipping is shorter. */
1887 if (arm_tune_strongarm)
1888 max_insns_skipped = 3;
1891 /* Hot/Cold partitioning is not currently supported, since we can't
1892 handle literal pool placement in that case. */
1893 if (flag_reorder_blocks_and_partition)
1895 inform (input_location,
1896 "-freorder-blocks-and-partition not supported on this architecture");
1897 flag_reorder_blocks_and_partition = 0;
1898 flag_reorder_blocks = 1;
1901 if (!PARAM_SET_P (PARAM_GCSE_UNRESTRICTED_COST)
1903 /* Hoisting PIC address calculations more aggressively provides a small,
1904 but measurable, size reduction for PIC code. Therefore, we decrease
1905 the bar for unrestricted expression hoisting to the cost of PIC address
1906 calculation, which is 2 instructions. */
1907 set_param_value ("gcse-unrestricted-cost", 2);
1909 /* Register global variables with the garbage collector. */
1910 arm_add_gc_roots ();
1914 arm_add_gc_roots (void)
1916 gcc_obstack_init(&minipool_obstack);
1917 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1920 /* A table of known ARM exception types.
1921 For use with the interrupt function attribute. */
1925 const char *const arg;
1926 const unsigned long return_value;
1930 static const isr_attribute_arg isr_attribute_args [] =
1932 { "IRQ", ARM_FT_ISR },
1933 { "irq", ARM_FT_ISR },
1934 { "FIQ", ARM_FT_FIQ },
1935 { "fiq", ARM_FT_FIQ },
1936 { "ABORT", ARM_FT_ISR },
1937 { "abort", ARM_FT_ISR },
1938 { "ABORT", ARM_FT_ISR },
1939 { "abort", ARM_FT_ISR },
1940 { "UNDEF", ARM_FT_EXCEPTION },
1941 { "undef", ARM_FT_EXCEPTION },
1942 { "SWI", ARM_FT_EXCEPTION },
1943 { "swi", ARM_FT_EXCEPTION },
1944 { NULL, ARM_FT_NORMAL }
1947 /* Returns the (interrupt) function type of the current
1948 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1950 static unsigned long
1951 arm_isr_value (tree argument)
1953 const isr_attribute_arg * ptr;
1957 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1959 /* No argument - default to IRQ. */
1960 if (argument == NULL_TREE)
1963 /* Get the value of the argument. */
1964 if (TREE_VALUE (argument) == NULL_TREE
1965 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1966 return ARM_FT_UNKNOWN;
1968 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1970 /* Check it against the list of known arguments. */
1971 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1972 if (streq (arg, ptr->arg))
1973 return ptr->return_value;
1975 /* An unrecognized interrupt type. */
1976 return ARM_FT_UNKNOWN;
1979 /* Computes the type of the current function. */
1981 static unsigned long
1982 arm_compute_func_type (void)
1984 unsigned long type = ARM_FT_UNKNOWN;
1988 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1990 /* Decide if the current function is volatile. Such functions
1991 never return, and many memory cycles can be saved by not storing
1992 register values that will never be needed again. This optimization
1993 was added to speed up context switching in a kernel application. */
1995 && (TREE_NOTHROW (current_function_decl)
1996 || !(flag_unwind_tables
1997 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1998 && TREE_THIS_VOLATILE (current_function_decl))
1999 type |= ARM_FT_VOLATILE;
2001 if (cfun->static_chain_decl != NULL)
2002 type |= ARM_FT_NESTED;
2004 attr = DECL_ATTRIBUTES (current_function_decl);
2006 a = lookup_attribute ("naked", attr);
2008 type |= ARM_FT_NAKED;
2010 a = lookup_attribute ("isr", attr);
2012 a = lookup_attribute ("interrupt", attr);
2015 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2017 type |= arm_isr_value (TREE_VALUE (a));
2022 /* Returns the type of the current function. */
2025 arm_current_func_type (void)
2027 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2028 cfun->machine->func_type = arm_compute_func_type ();
2030 return cfun->machine->func_type;
2034 arm_allocate_stack_slots_for_args (void)
2036 /* Naked functions should not allocate stack slots for arguments. */
2037 return !IS_NAKED (arm_current_func_type ());
2041 /* Output assembler code for a block containing the constant parts
2042 of a trampoline, leaving space for the variable parts.
2044 On the ARM, (if r8 is the static chain regnum, and remembering that
2045 referencing pc adds an offset of 8) the trampoline looks like:
2048 .word static chain value
2049 .word function's address
2050 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2053 arm_asm_trampoline_template (FILE *f)
2057 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2058 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2060 else if (TARGET_THUMB2)
2062 /* The Thumb-2 trampoline is similar to the arm implementation.
2063 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2064 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2065 STATIC_CHAIN_REGNUM, PC_REGNUM);
2066 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2070 ASM_OUTPUT_ALIGN (f, 2);
2071 fprintf (f, "\t.code\t16\n");
2072 fprintf (f, ".Ltrampoline_start:\n");
2073 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2074 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2075 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2076 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2077 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2078 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2080 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2081 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2084 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2087 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2089 rtx fnaddr, mem, a_tramp;
2091 emit_block_move (m_tramp, assemble_trampoline_template (),
2092 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2094 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2095 emit_move_insn (mem, chain_value);
2097 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2098 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2099 emit_move_insn (mem, fnaddr);
2101 a_tramp = XEXP (m_tramp, 0);
2102 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2103 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2104 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2107 /* Thumb trampolines should be entered in thumb mode, so set
2108 the bottom bit of the address. */
2111 arm_trampoline_adjust_address (rtx addr)
2114 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2115 NULL, 0, OPTAB_LIB_WIDEN);
2119 /* Return 1 if it is possible to return using a single instruction.
2120 If SIBLING is non-null, this is a test for a return before a sibling
2121 call. SIBLING is the call insn, so we can examine its register usage. */
2124 use_return_insn (int iscond, rtx sibling)
2127 unsigned int func_type;
2128 unsigned long saved_int_regs;
2129 unsigned HOST_WIDE_INT stack_adjust;
2130 arm_stack_offsets *offsets;
2132 /* Never use a return instruction before reload has run. */
2133 if (!reload_completed)
2136 func_type = arm_current_func_type ();
2138 /* Naked, volatile and stack alignment functions need special
2140 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2143 /* So do interrupt functions that use the frame pointer and Thumb
2144 interrupt functions. */
2145 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2148 offsets = arm_get_frame_offsets ();
2149 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2151 /* As do variadic functions. */
2152 if (crtl->args.pretend_args_size
2153 || cfun->machine->uses_anonymous_args
2154 /* Or if the function calls __builtin_eh_return () */
2155 || crtl->calls_eh_return
2156 /* Or if the function calls alloca */
2157 || cfun->calls_alloca
2158 /* Or if there is a stack adjustment. However, if the stack pointer
2159 is saved on the stack, we can use a pre-incrementing stack load. */
2160 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2161 && stack_adjust == 4)))
2164 saved_int_regs = offsets->saved_regs_mask;
2166 /* Unfortunately, the insn
2168 ldmib sp, {..., sp, ...}
2170 triggers a bug on most SA-110 based devices, such that the stack
2171 pointer won't be correctly restored if the instruction takes a
2172 page fault. We work around this problem by popping r3 along with
2173 the other registers, since that is never slower than executing
2174 another instruction.
2176 We test for !arm_arch5 here, because code for any architecture
2177 less than this could potentially be run on one of the buggy
2179 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2181 /* Validate that r3 is a call-clobbered register (always true in
2182 the default abi) ... */
2183 if (!call_used_regs[3])
2186 /* ... that it isn't being used for a return value ... */
2187 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2190 /* ... or for a tail-call argument ... */
2193 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2195 if (find_regno_fusage (sibling, USE, 3))
2199 /* ... and that there are no call-saved registers in r0-r2
2200 (always true in the default ABI). */
2201 if (saved_int_regs & 0x7)
2205 /* Can't be done if interworking with Thumb, and any registers have been
2207 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2210 /* On StrongARM, conditional returns are expensive if they aren't
2211 taken and multiple registers have been stacked. */
2212 if (iscond && arm_tune_strongarm)
2214 /* Conditional return when just the LR is stored is a simple
2215 conditional-load instruction, that's not expensive. */
2216 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2220 && arm_pic_register != INVALID_REGNUM
2221 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2225 /* If there are saved registers but the LR isn't saved, then we need
2226 two instructions for the return. */
2227 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2230 /* Can't be done if any of the FPA regs are pushed,
2231 since this also requires an insn. */
2232 if (TARGET_HARD_FLOAT && TARGET_FPA)
2233 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2234 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2237 /* Likewise VFP regs. */
2238 if (TARGET_HARD_FLOAT && TARGET_VFP)
2239 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2240 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2243 if (TARGET_REALLY_IWMMXT)
2244 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2245 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2251 /* Return TRUE if int I is a valid immediate ARM constant. */
2254 const_ok_for_arm (HOST_WIDE_INT i)
2258 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2259 be all zero, or all one. */
2260 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2261 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2262 != ((~(unsigned HOST_WIDE_INT) 0)
2263 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2266 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2268 /* Fast return for 0 and small values. We must do this for zero, since
2269 the code below can't handle that one case. */
2270 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2273 /* Get the number of trailing zeros. */
2274 lowbit = ffs((int) i) - 1;
2276 /* Only even shifts are allowed in ARM mode so round down to the
2277 nearest even number. */
2281 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2286 /* Allow rotated constants in ARM mode. */
2288 && ((i & ~0xc000003f) == 0
2289 || (i & ~0xf000000f) == 0
2290 || (i & ~0xfc000003) == 0))
2297 /* Allow repeated pattern. */
2300 if (i == v || i == (v | (v << 8)))
2307 /* Return true if I is a valid constant for the operation CODE. */
2309 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2311 if (const_ok_for_arm (i))
2335 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2337 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2343 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2347 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2354 /* Emit a sequence of insns to handle a large constant.
2355 CODE is the code of the operation required, it can be any of SET, PLUS,
2356 IOR, AND, XOR, MINUS;
2357 MODE is the mode in which the operation is being performed;
2358 VAL is the integer to operate on;
2359 SOURCE is the other operand (a register, or a null-pointer for SET);
2360 SUBTARGETS means it is safe to create scratch registers if that will
2361 either produce a simpler sequence, or we will want to cse the values.
2362 Return value is the number of insns emitted. */
2364 /* ??? Tweak this for thumb2. */
2366 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2367 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2371 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2372 cond = COND_EXEC_TEST (PATTERN (insn));
2376 if (subtargets || code == SET
2377 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2378 && REGNO (target) != REGNO (source)))
2380 /* After arm_reorg has been called, we can't fix up expensive
2381 constants by pushing them into memory so we must synthesize
2382 them in-line, regardless of the cost. This is only likely to
2383 be more costly on chips that have load delay slots and we are
2384 compiling without running the scheduler (so no splitting
2385 occurred before the final instruction emission).
2387 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2389 if (!after_arm_reorg
2391 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2393 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2398 /* Currently SET is the only monadic value for CODE, all
2399 the rest are diadic. */
2400 if (TARGET_USE_MOVT)
2401 arm_emit_movpair (target, GEN_INT (val));
2403 emit_set_insn (target, GEN_INT (val));
2409 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2411 if (TARGET_USE_MOVT)
2412 arm_emit_movpair (temp, GEN_INT (val));
2414 emit_set_insn (temp, GEN_INT (val));
2416 /* For MINUS, the value is subtracted from, since we never
2417 have subtraction of a constant. */
2419 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2421 emit_set_insn (target,
2422 gen_rtx_fmt_ee (code, mode, source, temp));
2428 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2432 /* Return the number of instructions required to synthesize the given
2433 constant, if we start emitting them from bit-position I. */
2435 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2437 HOST_WIDE_INT temp1;
2438 int step_size = TARGET_ARM ? 2 : 1;
2441 gcc_assert (TARGET_ARM || i == 0);
2449 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2454 temp1 = remainder & ((0x0ff << end)
2455 | ((i < end) ? (0xff >> (32 - end)) : 0));
2456 remainder &= ~temp1;
2461 } while (remainder);
2466 find_best_start (unsigned HOST_WIDE_INT remainder)
2468 int best_consecutive_zeros = 0;
2472 /* If we aren't targetting ARM, the best place to start is always at
2477 for (i = 0; i < 32; i += 2)
2479 int consecutive_zeros = 0;
2481 if (!(remainder & (3 << i)))
2483 while ((i < 32) && !(remainder & (3 << i)))
2485 consecutive_zeros += 2;
2488 if (consecutive_zeros > best_consecutive_zeros)
2490 best_consecutive_zeros = consecutive_zeros;
2491 best_start = i - consecutive_zeros;
2497 /* So long as it won't require any more insns to do so, it's
2498 desirable to emit a small constant (in bits 0...9) in the last
2499 insn. This way there is more chance that it can be combined with
2500 a later addressing insn to form a pre-indexed load or store
2501 operation. Consider:
2503 *((volatile int *)0xe0000100) = 1;
2504 *((volatile int *)0xe0000110) = 2;
2506 We want this to wind up as:
2510 str rB, [rA, #0x100]
2512 str rB, [rA, #0x110]
2514 rather than having to synthesize both large constants from scratch.
2516 Therefore, we calculate how many insns would be required to emit
2517 the constant starting from `best_start', and also starting from
2518 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2519 yield a shorter sequence, we may as well use zero. */
2521 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2522 && (count_insns_for_constant (remainder, 0) <=
2523 count_insns_for_constant (remainder, best_start)))
2529 /* Emit an instruction with the indicated PATTERN. If COND is
2530 non-NULL, conditionalize the execution of the instruction on COND
2534 emit_constant_insn (rtx cond, rtx pattern)
2537 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2538 emit_insn (pattern);
2541 /* As above, but extra parameter GENERATE which, if clear, suppresses
2543 /* ??? This needs more work for thumb2. */
2546 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2547 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2552 int final_invert = 0;
2553 int can_negate_initial = 0;
2555 int num_bits_set = 0;
2556 int set_sign_bit_copies = 0;
2557 int clear_sign_bit_copies = 0;
2558 int clear_zero_bit_copies = 0;
2559 int set_zero_bit_copies = 0;
2561 unsigned HOST_WIDE_INT temp1, temp2;
2562 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2563 int step_size = TARGET_ARM ? 2 : 1;
2565 /* Find out which operations are safe for a given CODE. Also do a quick
2566 check for degenerate cases; these can occur when DImode operations
2577 can_negate_initial = 1;
2581 if (remainder == 0xffffffff)
2584 emit_constant_insn (cond,
2585 gen_rtx_SET (VOIDmode, target,
2586 GEN_INT (ARM_SIGN_EXTEND (val))));
2592 if (reload_completed && rtx_equal_p (target, source))
2596 emit_constant_insn (cond,
2597 gen_rtx_SET (VOIDmode, target, source));
2609 emit_constant_insn (cond,
2610 gen_rtx_SET (VOIDmode, target, const0_rtx));
2613 if (remainder == 0xffffffff)
2615 if (reload_completed && rtx_equal_p (target, source))
2618 emit_constant_insn (cond,
2619 gen_rtx_SET (VOIDmode, target, source));
2628 if (reload_completed && rtx_equal_p (target, source))
2631 emit_constant_insn (cond,
2632 gen_rtx_SET (VOIDmode, target, source));
2636 if (remainder == 0xffffffff)
2639 emit_constant_insn (cond,
2640 gen_rtx_SET (VOIDmode, target,
2641 gen_rtx_NOT (mode, source)));
2647 /* We treat MINUS as (val - source), since (source - val) is always
2648 passed as (source + (-val)). */
2652 emit_constant_insn (cond,
2653 gen_rtx_SET (VOIDmode, target,
2654 gen_rtx_NEG (mode, source)));
2657 if (const_ok_for_arm (val))
2660 emit_constant_insn (cond,
2661 gen_rtx_SET (VOIDmode, target,
2662 gen_rtx_MINUS (mode, GEN_INT (val),
2674 /* If we can do it in one insn get out quickly. */
2675 if (const_ok_for_arm (val)
2676 || (can_negate_initial && const_ok_for_arm (-val))
2677 || (can_invert && const_ok_for_arm (~val)))
2680 emit_constant_insn (cond,
2681 gen_rtx_SET (VOIDmode, target,
2683 ? gen_rtx_fmt_ee (code, mode, source,
2689 /* Calculate a few attributes that may be useful for specific
2691 /* Count number of leading zeros. */
2692 for (i = 31; i >= 0; i--)
2694 if ((remainder & (1 << i)) == 0)
2695 clear_sign_bit_copies++;
2700 /* Count number of leading 1's. */
2701 for (i = 31; i >= 0; i--)
2703 if ((remainder & (1 << i)) != 0)
2704 set_sign_bit_copies++;
2709 /* Count number of trailing zero's. */
2710 for (i = 0; i <= 31; i++)
2712 if ((remainder & (1 << i)) == 0)
2713 clear_zero_bit_copies++;
2718 /* Count number of trailing 1's. */
2719 for (i = 0; i <= 31; i++)
2721 if ((remainder & (1 << i)) != 0)
2722 set_zero_bit_copies++;
2730 /* See if we can use movw. */
2731 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2734 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2739 /* See if we can do this by sign_extending a constant that is known
2740 to be negative. This is a good, way of doing it, since the shift
2741 may well merge into a subsequent insn. */
2742 if (set_sign_bit_copies > 1)
2744 if (const_ok_for_arm
2745 (temp1 = ARM_SIGN_EXTEND (remainder
2746 << (set_sign_bit_copies - 1))))
2750 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2751 emit_constant_insn (cond,
2752 gen_rtx_SET (VOIDmode, new_src,
2754 emit_constant_insn (cond,
2755 gen_ashrsi3 (target, new_src,
2756 GEN_INT (set_sign_bit_copies - 1)));
2760 /* For an inverted constant, we will need to set the low bits,
2761 these will be shifted out of harm's way. */
2762 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2763 if (const_ok_for_arm (~temp1))
2767 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2768 emit_constant_insn (cond,
2769 gen_rtx_SET (VOIDmode, new_src,
2771 emit_constant_insn (cond,
2772 gen_ashrsi3 (target, new_src,
2773 GEN_INT (set_sign_bit_copies - 1)));
2779 /* See if we can calculate the value as the difference between two
2780 valid immediates. */
2781 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2783 int topshift = clear_sign_bit_copies & ~1;
2785 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2786 & (0xff000000 >> topshift));
2788 /* If temp1 is zero, then that means the 9 most significant
2789 bits of remainder were 1 and we've caused it to overflow.
2790 When topshift is 0 we don't need to do anything since we
2791 can borrow from 'bit 32'. */
2792 if (temp1 == 0 && topshift != 0)
2793 temp1 = 0x80000000 >> (topshift - 1);
2795 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2797 if (const_ok_for_arm (temp2))
2801 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2802 emit_constant_insn (cond,
2803 gen_rtx_SET (VOIDmode, new_src,
2805 emit_constant_insn (cond,
2806 gen_addsi3 (target, new_src,
2814 /* See if we can generate this by setting the bottom (or the top)
2815 16 bits, and then shifting these into the other half of the
2816 word. We only look for the simplest cases, to do more would cost
2817 too much. Be careful, however, not to generate this when the
2818 alternative would take fewer insns. */
2819 if (val & 0xffff0000)
2821 temp1 = remainder & 0xffff0000;
2822 temp2 = remainder & 0x0000ffff;
2824 /* Overlaps outside this range are best done using other methods. */
2825 for (i = 9; i < 24; i++)
2827 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2828 && !const_ok_for_arm (temp2))
2830 rtx new_src = (subtargets
2831 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2833 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2834 source, subtargets, generate);
2842 gen_rtx_ASHIFT (mode, source,
2849 /* Don't duplicate cases already considered. */
2850 for (i = 17; i < 24; i++)
2852 if (((temp1 | (temp1 >> i)) == remainder)
2853 && !const_ok_for_arm (temp1))
2855 rtx new_src = (subtargets
2856 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2858 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2859 source, subtargets, generate);
2864 gen_rtx_SET (VOIDmode, target,
2867 gen_rtx_LSHIFTRT (mode, source,
2878 /* If we have IOR or XOR, and the constant can be loaded in a
2879 single instruction, and we can find a temporary to put it in,
2880 then this can be done in two instructions instead of 3-4. */
2882 /* TARGET can't be NULL if SUBTARGETS is 0 */
2883 || (reload_completed && !reg_mentioned_p (target, source)))
2885 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2889 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2891 emit_constant_insn (cond,
2892 gen_rtx_SET (VOIDmode, sub,
2894 emit_constant_insn (cond,
2895 gen_rtx_SET (VOIDmode, target,
2896 gen_rtx_fmt_ee (code, mode,
2907 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2908 and the remainder 0s for e.g. 0xfff00000)
2909 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2911 This can be done in 2 instructions by using shifts with mov or mvn.
2916 mvn r0, r0, lsr #12 */
2917 if (set_sign_bit_copies > 8
2918 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2922 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2923 rtx shift = GEN_INT (set_sign_bit_copies);
2927 gen_rtx_SET (VOIDmode, sub,
2929 gen_rtx_ASHIFT (mode,
2934 gen_rtx_SET (VOIDmode, target,
2936 gen_rtx_LSHIFTRT (mode, sub,
2943 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2945 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2947 For eg. r0 = r0 | 0xfff
2952 if (set_zero_bit_copies > 8
2953 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2957 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2958 rtx shift = GEN_INT (set_zero_bit_copies);
2962 gen_rtx_SET (VOIDmode, sub,
2964 gen_rtx_LSHIFTRT (mode,
2969 gen_rtx_SET (VOIDmode, target,
2971 gen_rtx_ASHIFT (mode, sub,
2977 /* This will never be reached for Thumb2 because orn is a valid
2978 instruction. This is for Thumb1 and the ARM 32 bit cases.
2980 x = y | constant (such that ~constant is a valid constant)
2982 x = ~(~y & ~constant).
2984 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2988 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2989 emit_constant_insn (cond,
2990 gen_rtx_SET (VOIDmode, sub,
2991 gen_rtx_NOT (mode, source)));
2994 sub = gen_reg_rtx (mode);
2995 emit_constant_insn (cond,
2996 gen_rtx_SET (VOIDmode, sub,
2997 gen_rtx_AND (mode, source,
2999 emit_constant_insn (cond,
3000 gen_rtx_SET (VOIDmode, target,
3001 gen_rtx_NOT (mode, sub)));
3008 /* See if two shifts will do 2 or more insn's worth of work. */
3009 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3011 HOST_WIDE_INT shift_mask = ((0xffffffff
3012 << (32 - clear_sign_bit_copies))
3015 if ((remainder | shift_mask) != 0xffffffff)
3019 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3020 insns = arm_gen_constant (AND, mode, cond,
3021 remainder | shift_mask,
3022 new_src, source, subtargets, 1);
3027 rtx targ = subtargets ? NULL_RTX : target;
3028 insns = arm_gen_constant (AND, mode, cond,
3029 remainder | shift_mask,
3030 targ, source, subtargets, 0);
3036 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3037 rtx shift = GEN_INT (clear_sign_bit_copies);
3039 emit_insn (gen_ashlsi3 (new_src, source, shift));
3040 emit_insn (gen_lshrsi3 (target, new_src, shift));
3046 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3048 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3050 if ((remainder | shift_mask) != 0xffffffff)
3054 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3056 insns = arm_gen_constant (AND, mode, cond,
3057 remainder | shift_mask,
3058 new_src, source, subtargets, 1);
3063 rtx targ = subtargets ? NULL_RTX : target;
3065 insns = arm_gen_constant (AND, mode, cond,
3066 remainder | shift_mask,
3067 targ, source, subtargets, 0);
3073 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3074 rtx shift = GEN_INT (clear_zero_bit_copies);
3076 emit_insn (gen_lshrsi3 (new_src, source, shift));
3077 emit_insn (gen_ashlsi3 (target, new_src, shift));
3089 for (i = 0; i < 32; i++)
3090 if (remainder & (1 << i))
3094 || (code != IOR && can_invert && num_bits_set > 16))
3095 remainder ^= 0xffffffff;
3096 else if (code == PLUS && num_bits_set > 16)
3097 remainder = (-remainder) & 0xffffffff;
3099 /* For XOR, if more than half the bits are set and there's a sequence
3100 of more than 8 consecutive ones in the pattern then we can XOR by the
3101 inverted constant and then invert the final result; this may save an
3102 instruction and might also lead to the final mvn being merged with
3103 some other operation. */
3104 else if (code == XOR && num_bits_set > 16
3105 && (count_insns_for_constant (remainder ^ 0xffffffff,
3107 (remainder ^ 0xffffffff))
3108 < count_insns_for_constant (remainder,
3109 find_best_start (remainder))))
3111 remainder ^= 0xffffffff;
3120 /* Now try and find a way of doing the job in either two or three
3122 We start by looking for the largest block of zeros that are aligned on
3123 a 2-bit boundary, we then fill up the temps, wrapping around to the
3124 top of the word when we drop off the bottom.
3125 In the worst case this code should produce no more than four insns.
3126 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3127 best place to start. */
3129 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3132 /* Now start emitting the insns. */
3133 i = find_best_start (remainder);
3140 if (remainder & (3 << (i - 2)))
3145 temp1 = remainder & ((0x0ff << end)
3146 | ((i < end) ? (0xff >> (32 - end)) : 0));
3147 remainder &= ~temp1;
3151 rtx new_src, temp1_rtx;
3153 if (code == SET || code == MINUS)
3155 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3156 if (can_invert && code != MINUS)
3161 if ((final_invert || remainder) && subtargets)
3162 new_src = gen_reg_rtx (mode);
3167 else if (can_negate)
3171 temp1 = trunc_int_for_mode (temp1, mode);
3172 temp1_rtx = GEN_INT (temp1);
3176 else if (code == MINUS)
3177 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3179 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3181 emit_constant_insn (cond,
3182 gen_rtx_SET (VOIDmode, new_src,
3192 else if (code == MINUS)
3198 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3208 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3209 gen_rtx_NOT (mode, source)));
3216 /* Canonicalize a comparison so that we are more likely to recognize it.
3217 This can be done for a few constant compares, where we can make the
3218 immediate value easier to load. */
3221 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3223 enum machine_mode mode;
3224 unsigned HOST_WIDE_INT i, maxval;
3226 mode = GET_MODE (*op0);
3227 if (mode == VOIDmode)
3228 mode = GET_MODE (*op1);
3230 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3232 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3233 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3234 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3235 for GTU/LEU in Thumb mode. */
3240 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3242 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3245 if (code == GT || code == LE
3246 || (!TARGET_ARM && (code == GTU || code == LEU)))
3248 /* Missing comparison. First try to use an available
3250 if (GET_CODE (*op1) == CONST_INT)
3258 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3260 *op1 = GEN_INT (i + 1);
3261 return code == GT ? GE : LT;
3266 if (i != ~((unsigned HOST_WIDE_INT) 0)
3267 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3269 *op1 = GEN_INT (i + 1);
3270 return code == GTU ? GEU : LTU;
3278 /* If that did not work, reverse the condition. */
3282 return swap_condition (code);
3288 /* Comparisons smaller than DImode. Only adjust comparisons against
3289 an out-of-range constant. */
3290 if (GET_CODE (*op1) != CONST_INT
3291 || const_ok_for_arm (INTVAL (*op1))
3292 || const_ok_for_arm (- INTVAL (*op1)))
3306 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3308 *op1 = GEN_INT (i + 1);
3309 return code == GT ? GE : LT;
3316 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3318 *op1 = GEN_INT (i - 1);
3319 return code == GE ? GT : LE;
3325 if (i != ~((unsigned HOST_WIDE_INT) 0)
3326 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3328 *op1 = GEN_INT (i + 1);
3329 return code == GTU ? GEU : LTU;
3336 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3338 *op1 = GEN_INT (i - 1);
3339 return code == GEU ? GTU : LEU;
3351 /* Define how to find the value returned by a function. */
3354 arm_function_value(const_tree type, const_tree func,
3355 bool outgoing ATTRIBUTE_UNUSED)
3357 enum machine_mode mode;
3358 int unsignedp ATTRIBUTE_UNUSED;
3359 rtx r ATTRIBUTE_UNUSED;
3361 mode = TYPE_MODE (type);
3363 if (TARGET_AAPCS_BASED)
3364 return aapcs_allocate_return_reg (mode, type, func);
3366 /* Promote integer types. */
3367 if (INTEGRAL_TYPE_P (type))
3368 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3370 /* Promotes small structs returned in a register to full-word size
3371 for big-endian AAPCS. */
3372 if (arm_return_in_msb (type))
3374 HOST_WIDE_INT size = int_size_in_bytes (type);
3375 if (size % UNITS_PER_WORD != 0)
3377 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3378 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3382 return LIBCALL_VALUE (mode);
3386 libcall_eq (const void *p1, const void *p2)
3388 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3392 libcall_hash (const void *p1)
3394 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3398 add_libcall (htab_t htab, rtx libcall)
3400 *htab_find_slot (htab, libcall, INSERT) = libcall;
3404 arm_libcall_uses_aapcs_base (const_rtx libcall)
3406 static bool init_done = false;
3407 static htab_t libcall_htab;
3413 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3415 add_libcall (libcall_htab,
3416 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3417 add_libcall (libcall_htab,
3418 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3419 add_libcall (libcall_htab,
3420 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3421 add_libcall (libcall_htab,
3422 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3424 add_libcall (libcall_htab,
3425 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3426 add_libcall (libcall_htab,
3427 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3428 add_libcall (libcall_htab,
3429 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3430 add_libcall (libcall_htab,
3431 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3433 add_libcall (libcall_htab,
3434 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3435 add_libcall (libcall_htab,
3436 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3437 add_libcall (libcall_htab,
3438 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3439 add_libcall (libcall_htab,
3440 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3441 add_libcall (libcall_htab,
3442 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3443 add_libcall (libcall_htab,
3444 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3447 return libcall && htab_find (libcall_htab, libcall) != NULL;
3451 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3453 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3454 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3456 /* The following libcalls return their result in integer registers,
3457 even though they return a floating point value. */
3458 if (arm_libcall_uses_aapcs_base (libcall))
3459 return gen_rtx_REG (mode, ARG_REGISTER(1));
3463 return LIBCALL_VALUE (mode);
3466 /* Determine the amount of memory needed to store the possible return
3467 registers of an untyped call. */
3469 arm_apply_result_size (void)
3475 if (TARGET_HARD_FLOAT_ABI)
3481 if (TARGET_MAVERICK)
3484 if (TARGET_IWMMXT_ABI)
3491 /* Decide whether TYPE should be returned in memory (true)
3492 or in a register (false). FNTYPE is the type of the function making
3495 arm_return_in_memory (const_tree type, const_tree fntype)
3499 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3501 if (TARGET_AAPCS_BASED)
3503 /* Simple, non-aggregate types (ie not including vectors and
3504 complex) are always returned in a register (or registers).
3505 We don't care about which register here, so we can short-cut
3506 some of the detail. */
3507 if (!AGGREGATE_TYPE_P (type)
3508 && TREE_CODE (type) != VECTOR_TYPE
3509 && TREE_CODE (type) != COMPLEX_TYPE)
3512 /* Any return value that is no larger than one word can be
3514 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3517 /* Check any available co-processors to see if they accept the
3518 type as a register candidate (VFP, for example, can return
3519 some aggregates in consecutive registers). These aren't
3520 available if the call is variadic. */
3521 if (aapcs_select_return_coproc (type, fntype) >= 0)
3524 /* Vector values should be returned using ARM registers, not
3525 memory (unless they're over 16 bytes, which will break since
3526 we only have four call-clobbered registers to play with). */
3527 if (TREE_CODE (type) == VECTOR_TYPE)
3528 return (size < 0 || size > (4 * UNITS_PER_WORD));
3530 /* The rest go in memory. */
3534 if (TREE_CODE (type) == VECTOR_TYPE)
3535 return (size < 0 || size > (4 * UNITS_PER_WORD));
3537 if (!AGGREGATE_TYPE_P (type) &&
3538 (TREE_CODE (type) != VECTOR_TYPE))
3539 /* All simple types are returned in registers. */
3542 if (arm_abi != ARM_ABI_APCS)
3544 /* ATPCS and later return aggregate types in memory only if they are
3545 larger than a word (or are variable size). */
3546 return (size < 0 || size > UNITS_PER_WORD);
3549 /* For the arm-wince targets we choose to be compatible with Microsoft's
3550 ARM and Thumb compilers, which always return aggregates in memory. */
3552 /* All structures/unions bigger than one word are returned in memory.
3553 Also catch the case where int_size_in_bytes returns -1. In this case
3554 the aggregate is either huge or of variable size, and in either case
3555 we will want to return it via memory and not in a register. */
3556 if (size < 0 || size > UNITS_PER_WORD)
3559 if (TREE_CODE (type) == RECORD_TYPE)
3563 /* For a struct the APCS says that we only return in a register
3564 if the type is 'integer like' and every addressable element
3565 has an offset of zero. For practical purposes this means
3566 that the structure can have at most one non bit-field element
3567 and that this element must be the first one in the structure. */
3569 /* Find the first field, ignoring non FIELD_DECL things which will
3570 have been created by C++. */
3571 for (field = TYPE_FIELDS (type);
3572 field && TREE_CODE (field) != FIELD_DECL;
3573 field = DECL_CHAIN (field))
3577 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3579 /* Check that the first field is valid for returning in a register. */
3581 /* ... Floats are not allowed */
3582 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3585 /* ... Aggregates that are not themselves valid for returning in
3586 a register are not allowed. */
3587 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3590 /* Now check the remaining fields, if any. Only bitfields are allowed,
3591 since they are not addressable. */
3592 for (field = DECL_CHAIN (field);
3594 field = DECL_CHAIN (field))
3596 if (TREE_CODE (field) != FIELD_DECL)
3599 if (!DECL_BIT_FIELD_TYPE (field))
3606 if (TREE_CODE (type) == UNION_TYPE)
3610 /* Unions can be returned in registers if every element is
3611 integral, or can be returned in an integer register. */
3612 for (field = TYPE_FIELDS (type);
3614 field = DECL_CHAIN (field))
3616 if (TREE_CODE (field) != FIELD_DECL)
3619 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3622 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3628 #endif /* not ARM_WINCE */
3630 /* Return all other types in memory. */
3634 /* Indicate whether or not words of a double are in big-endian order. */
3637 arm_float_words_big_endian (void)
3639 if (TARGET_MAVERICK)
3642 /* For FPA, float words are always big-endian. For VFP, floats words
3643 follow the memory system mode. */
3651 return (TARGET_BIG_END ? 1 : 0);
3656 const struct pcs_attribute_arg
3660 } pcs_attribute_args[] =
3662 {"aapcs", ARM_PCS_AAPCS},
3663 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3665 /* We could recognize these, but changes would be needed elsewhere
3666 * to implement them. */
3667 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3668 {"atpcs", ARM_PCS_ATPCS},
3669 {"apcs", ARM_PCS_APCS},
3671 {NULL, ARM_PCS_UNKNOWN}
3675 arm_pcs_from_attribute (tree attr)
3677 const struct pcs_attribute_arg *ptr;
3680 /* Get the value of the argument. */
3681 if (TREE_VALUE (attr) == NULL_TREE
3682 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3683 return ARM_PCS_UNKNOWN;
3685 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3687 /* Check it against the list of known arguments. */
3688 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3689 if (streq (arg, ptr->arg))
3692 /* An unrecognized interrupt type. */
3693 return ARM_PCS_UNKNOWN;
3696 /* Get the PCS variant to use for this call. TYPE is the function's type
3697 specification, DECL is the specific declartion. DECL may be null if
3698 the call could be indirect or if this is a library call. */
3700 arm_get_pcs_model (const_tree type, const_tree decl)
3702 bool user_convention = false;
3703 enum arm_pcs user_pcs = arm_pcs_default;
3708 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3711 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3712 user_convention = true;
3715 if (TARGET_AAPCS_BASED)
3717 /* Detect varargs functions. These always use the base rules
3718 (no argument is ever a candidate for a co-processor
3720 bool base_rules = stdarg_p (type);
3722 if (user_convention)
3724 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3725 sorry ("Non-AAPCS derived PCS variant");
3726 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3727 error ("Variadic functions must use the base AAPCS variant");
3731 return ARM_PCS_AAPCS;
3732 else if (user_convention)
3734 else if (decl && flag_unit_at_a_time)
3736 /* Local functions never leak outside this compilation unit,
3737 so we are free to use whatever conventions are
3739 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3740 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3742 return ARM_PCS_AAPCS_LOCAL;
3745 else if (user_convention && user_pcs != arm_pcs_default)
3746 sorry ("PCS variant");
3748 /* For everything else we use the target's default. */
3749 return arm_pcs_default;
3754 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3755 const_tree fntype ATTRIBUTE_UNUSED,
3756 rtx libcall ATTRIBUTE_UNUSED,
3757 const_tree fndecl ATTRIBUTE_UNUSED)
3759 /* Record the unallocated VFP registers. */
3760 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3761 pcum->aapcs_vfp_reg_alloc = 0;
3764 /* Walk down the type tree of TYPE counting consecutive base elements.
3765 If *MODEP is VOIDmode, then set it to the first valid floating point
3766 type. If a non-floating point type is found, or if a floating point
3767 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3768 otherwise return the count in the sub-tree. */
3770 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3772 enum machine_mode mode;
3775 switch (TREE_CODE (type))
3778 mode = TYPE_MODE (type);
3779 if (mode != DFmode && mode != SFmode)
3782 if (*modep == VOIDmode)
3791 mode = TYPE_MODE (TREE_TYPE (type));
3792 if (mode != DFmode && mode != SFmode)
3795 if (*modep == VOIDmode)
3804 /* Use V2SImode and V4SImode as representatives of all 64-bit
3805 and 128-bit vector types, whether or not those modes are
3806 supported with the present options. */
3807 size = int_size_in_bytes (type);
3820 if (*modep == VOIDmode)
3823 /* Vector modes are considered to be opaque: two vectors are
3824 equivalent for the purposes of being homogeneous aggregates
3825 if they are the same size. */
3834 tree index = TYPE_DOMAIN (type);
3836 /* Can't handle incomplete types. */
3837 if (!COMPLETE_TYPE_P(type))
3840 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3843 || !TYPE_MAX_VALUE (index)
3844 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3845 || !TYPE_MIN_VALUE (index)
3846 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3850 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3851 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3853 /* There must be no padding. */
3854 if (!host_integerp (TYPE_SIZE (type), 1)
3855 || (tree_low_cst (TYPE_SIZE (type), 1)
3856 != count * GET_MODE_BITSIZE (*modep)))
3868 /* Can't handle incomplete types. */
3869 if (!COMPLETE_TYPE_P(type))
3872 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3874 if (TREE_CODE (field) != FIELD_DECL)
3877 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3883 /* There must be no padding. */
3884 if (!host_integerp (TYPE_SIZE (type), 1)
3885 || (tree_low_cst (TYPE_SIZE (type), 1)
3886 != count * GET_MODE_BITSIZE (*modep)))
3893 case QUAL_UNION_TYPE:
3895 /* These aren't very interesting except in a degenerate case. */
3900 /* Can't handle incomplete types. */
3901 if (!COMPLETE_TYPE_P(type))
3904 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3906 if (TREE_CODE (field) != FIELD_DECL)
3909 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3912 count = count > sub_count ? count : sub_count;
3915 /* There must be no padding. */
3916 if (!host_integerp (TYPE_SIZE (type), 1)
3917 || (tree_low_cst (TYPE_SIZE (type), 1)
3918 != count * GET_MODE_BITSIZE (*modep)))
3931 /* Return true if PCS_VARIANT should use VFP registers. */
3933 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3935 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3937 static bool seen_thumb1_vfp = false;
3939 if (TARGET_THUMB1 && !seen_thumb1_vfp)
3941 sorry ("Thumb-1 hard-float VFP ABI");
3942 /* sorry() is not immediately fatal, so only display this once. */
3943 seen_thumb1_vfp = true;
3949 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3952 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3953 (TARGET_VFP_DOUBLE || !is_double));
3957 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3958 enum machine_mode mode, const_tree type,
3959 enum machine_mode *base_mode, int *count)
3961 enum machine_mode new_mode = VOIDmode;
3963 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3964 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3965 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3970 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3973 new_mode = (mode == DCmode ? DFmode : SFmode);
3975 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3977 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3979 if (ag_count > 0 && ag_count <= 4)
3988 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3991 *base_mode = new_mode;
3996 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3997 enum machine_mode mode, const_tree type)
3999 int count ATTRIBUTE_UNUSED;
4000 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4002 if (!use_vfp_abi (pcs_variant, false))
4004 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4009 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4012 if (!use_vfp_abi (pcum->pcs_variant, false))
4015 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4016 &pcum->aapcs_vfp_rmode,
4017 &pcum->aapcs_vfp_rcount);
4021 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4022 const_tree type ATTRIBUTE_UNUSED)
4024 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4025 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4028 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4029 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4031 pcum->aapcs_vfp_reg_alloc = mask << regno;
4032 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4035 int rcount = pcum->aapcs_vfp_rcount;
4037 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4041 /* Avoid using unsupported vector modes. */
4042 if (rmode == V2SImode)
4044 else if (rmode == V4SImode)
4051 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4052 for (i = 0; i < rcount; i++)
4054 rtx tmp = gen_rtx_REG (rmode,
4055 FIRST_VFP_REGNUM + regno + i * rshift);
4056 tmp = gen_rtx_EXPR_LIST
4058 GEN_INT (i * GET_MODE_SIZE (rmode)));
4059 XVECEXP (par, 0, i) = tmp;
4062 pcum->aapcs_reg = par;
4065 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4072 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4073 enum machine_mode mode,
4074 const_tree type ATTRIBUTE_UNUSED)
4076 if (!use_vfp_abi (pcs_variant, false))
4079 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4082 enum machine_mode ag_mode;
4087 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4092 if (ag_mode == V2SImode)
4094 else if (ag_mode == V4SImode)
4100 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4101 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4102 for (i = 0; i < count; i++)
4104 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4105 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4106 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4107 XVECEXP (par, 0, i) = tmp;
4113 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4117 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4118 enum machine_mode mode ATTRIBUTE_UNUSED,
4119 const_tree type ATTRIBUTE_UNUSED)
4121 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4122 pcum->aapcs_vfp_reg_alloc = 0;
4126 #define AAPCS_CP(X) \
4128 aapcs_ ## X ## _cum_init, \
4129 aapcs_ ## X ## _is_call_candidate, \
4130 aapcs_ ## X ## _allocate, \
4131 aapcs_ ## X ## _is_return_candidate, \
4132 aapcs_ ## X ## _allocate_return_reg, \
4133 aapcs_ ## X ## _advance \
4136 /* Table of co-processors that can be used to pass arguments in
4137 registers. Idealy no arugment should be a candidate for more than
4138 one co-processor table entry, but the table is processed in order
4139 and stops after the first match. If that entry then fails to put
4140 the argument into a co-processor register, the argument will go on
4144 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4145 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4147 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4148 BLKmode) is a candidate for this co-processor's registers; this
4149 function should ignore any position-dependent state in
4150 CUMULATIVE_ARGS and only use call-type dependent information. */
4151 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4153 /* Return true if the argument does get a co-processor register; it
4154 should set aapcs_reg to an RTX of the register allocated as is
4155 required for a return from FUNCTION_ARG. */
4156 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4158 /* Return true if a result of mode MODE (or type TYPE if MODE is
4159 BLKmode) is can be returned in this co-processor's registers. */
4160 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4162 /* Allocate and return an RTX element to hold the return type of a
4163 call, this routine must not fail and will only be called if
4164 is_return_candidate returned true with the same parameters. */
4165 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4167 /* Finish processing this argument and prepare to start processing
4169 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4170 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4178 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4183 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4184 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4191 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4193 /* We aren't passed a decl, so we can't check that a call is local.
4194 However, it isn't clear that that would be a win anyway, since it
4195 might limit some tail-calling opportunities. */
4196 enum arm_pcs pcs_variant;
4200 const_tree fndecl = NULL_TREE;
4202 if (TREE_CODE (fntype) == FUNCTION_DECL)
4205 fntype = TREE_TYPE (fntype);
4208 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4211 pcs_variant = arm_pcs_default;
4213 if (pcs_variant != ARM_PCS_AAPCS)
4217 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4218 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4227 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4230 /* We aren't passed a decl, so we can't check that a call is local.
4231 However, it isn't clear that that would be a win anyway, since it
4232 might limit some tail-calling opportunities. */
4233 enum arm_pcs pcs_variant;
4234 int unsignedp ATTRIBUTE_UNUSED;
4238 const_tree fndecl = NULL_TREE;
4240 if (TREE_CODE (fntype) == FUNCTION_DECL)
4243 fntype = TREE_TYPE (fntype);
4246 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4249 pcs_variant = arm_pcs_default;
4251 /* Promote integer types. */
4252 if (type && INTEGRAL_TYPE_P (type))
4253 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4255 if (pcs_variant != ARM_PCS_AAPCS)
4259 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4260 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4262 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4266 /* Promotes small structs returned in a register to full-word size
4267 for big-endian AAPCS. */
4268 if (type && arm_return_in_msb (type))
4270 HOST_WIDE_INT size = int_size_in_bytes (type);
4271 if (size % UNITS_PER_WORD != 0)
4273 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4274 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4278 return gen_rtx_REG (mode, R0_REGNUM);
4282 aapcs_libcall_value (enum machine_mode mode)
4284 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4287 /* Lay out a function argument using the AAPCS rules. The rule
4288 numbers referred to here are those in the AAPCS. */
4290 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4291 tree type, int named)
4296 /* We only need to do this once per argument. */
4297 if (pcum->aapcs_arg_processed)
4300 pcum->aapcs_arg_processed = true;
4302 /* Special case: if named is false then we are handling an incoming
4303 anonymous argument which is on the stack. */
4307 /* Is this a potential co-processor register candidate? */
4308 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4310 int slot = aapcs_select_call_coproc (pcum, mode, type);
4311 pcum->aapcs_cprc_slot = slot;
4313 /* We don't have to apply any of the rules from part B of the
4314 preparation phase, these are handled elsewhere in the
4319 /* A Co-processor register candidate goes either in its own
4320 class of registers or on the stack. */
4321 if (!pcum->aapcs_cprc_failed[slot])
4323 /* C1.cp - Try to allocate the argument to co-processor
4325 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4328 /* C2.cp - Put the argument on the stack and note that we
4329 can't assign any more candidates in this slot. We also
4330 need to note that we have allocated stack space, so that
4331 we won't later try to split a non-cprc candidate between
4332 core registers and the stack. */
4333 pcum->aapcs_cprc_failed[slot] = true;
4334 pcum->can_split = false;
4337 /* We didn't get a register, so this argument goes on the
4339 gcc_assert (pcum->can_split == false);
4344 /* C3 - For double-word aligned arguments, round the NCRN up to the
4345 next even number. */
4346 ncrn = pcum->aapcs_ncrn;
4347 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4350 nregs = ARM_NUM_REGS2(mode, type);
4352 /* Sigh, this test should really assert that nregs > 0, but a GCC
4353 extension allows empty structs and then gives them empty size; it
4354 then allows such a structure to be passed by value. For some of
4355 the code below we have to pretend that such an argument has
4356 non-zero size so that we 'locate' it correctly either in
4357 registers or on the stack. */
4358 gcc_assert (nregs >= 0);
4360 nregs2 = nregs ? nregs : 1;
4362 /* C4 - Argument fits entirely in core registers. */
4363 if (ncrn + nregs2 <= NUM_ARG_REGS)
4365 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4366 pcum->aapcs_next_ncrn = ncrn + nregs;
4370 /* C5 - Some core registers left and there are no arguments already
4371 on the stack: split this argument between the remaining core
4372 registers and the stack. */
4373 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4375 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4376 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4377 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4381 /* C6 - NCRN is set to 4. */
4382 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4384 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4388 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4389 for a call to a function whose data type is FNTYPE.
4390 For a library call, FNTYPE is NULL. */
4392 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4394 tree fndecl ATTRIBUTE_UNUSED)
4396 /* Long call handling. */
4398 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4400 pcum->pcs_variant = arm_pcs_default;
4402 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4404 if (arm_libcall_uses_aapcs_base (libname))
4405 pcum->pcs_variant = ARM_PCS_AAPCS;
4407 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4408 pcum->aapcs_reg = NULL_RTX;
4409 pcum->aapcs_partial = 0;
4410 pcum->aapcs_arg_processed = false;
4411 pcum->aapcs_cprc_slot = -1;
4412 pcum->can_split = true;
4414 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4418 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4420 pcum->aapcs_cprc_failed[i] = false;
4421 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4429 /* On the ARM, the offset starts at 0. */
4431 pcum->iwmmxt_nregs = 0;
4432 pcum->can_split = true;
4434 /* Varargs vectors are treated the same as long long.
4435 named_count avoids having to change the way arm handles 'named' */
4436 pcum->named_count = 0;
4439 if (TARGET_REALLY_IWMMXT && fntype)
4443 for (fn_arg = TYPE_ARG_TYPES (fntype);
4445 fn_arg = TREE_CHAIN (fn_arg))
4446 pcum->named_count += 1;
4448 if (! pcum->named_count)
4449 pcum->named_count = INT_MAX;
4454 /* Return true if mode/type need doubleword alignment. */
4456 arm_needs_doubleword_align (enum machine_mode mode, tree type)
4458 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4459 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4463 /* Determine where to put an argument to a function.
4464 Value is zero to push the argument on the stack,
4465 or a hard register in which to store the argument.
4467 MODE is the argument's machine mode.
4468 TYPE is the data type of the argument (as a tree).
4469 This is null for libcalls where that information may
4471 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4472 the preceding args and about the function being called.
4473 NAMED is nonzero if this argument is a named parameter
4474 (otherwise it is an extra parameter matching an ellipsis). */
4477 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4478 tree type, int named)
4482 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4483 a call insn (op3 of a call_value insn). */
4484 if (mode == VOIDmode)
4487 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4489 aapcs_layout_arg (pcum, mode, type, named);
4490 return pcum->aapcs_reg;
4493 /* Varargs vectors are treated the same as long long.
4494 named_count avoids having to change the way arm handles 'named' */
4495 if (TARGET_IWMMXT_ABI
4496 && arm_vector_mode_supported_p (mode)
4497 && pcum->named_count > pcum->nargs + 1)
4499 if (pcum->iwmmxt_nregs <= 9)
4500 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4503 pcum->can_split = false;
4508 /* Put doubleword aligned quantities in even register pairs. */
4510 && ARM_DOUBLEWORD_ALIGN
4511 && arm_needs_doubleword_align (mode, type))
4514 /* Only allow splitting an arg between regs and memory if all preceding
4515 args were allocated to regs. For args passed by reference we only count
4516 the reference pointer. */
4517 if (pcum->can_split)
4520 nregs = ARM_NUM_REGS2 (mode, type);
4522 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4525 return gen_rtx_REG (mode, pcum->nregs);
4529 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4530 tree type, bool named)
4532 int nregs = pcum->nregs;
4534 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4536 aapcs_layout_arg (pcum, mode, type, named);
4537 return pcum->aapcs_partial;
4540 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4543 if (NUM_ARG_REGS > nregs
4544 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4546 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4552 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4553 tree type, bool named)
4555 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4557 aapcs_layout_arg (pcum, mode, type, named);
4559 if (pcum->aapcs_cprc_slot >= 0)
4561 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4563 pcum->aapcs_cprc_slot = -1;
4566 /* Generic stuff. */
4567 pcum->aapcs_arg_processed = false;
4568 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4569 pcum->aapcs_reg = NULL_RTX;
4570 pcum->aapcs_partial = 0;
4575 if (arm_vector_mode_supported_p (mode)
4576 && pcum->named_count > pcum->nargs
4577 && TARGET_IWMMXT_ABI)
4578 pcum->iwmmxt_nregs += 1;
4580 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4584 /* Variable sized types are passed by reference. This is a GCC
4585 extension to the ARM ABI. */
4588 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4589 enum machine_mode mode ATTRIBUTE_UNUSED,
4590 const_tree type, bool named ATTRIBUTE_UNUSED)
4592 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4595 /* Encode the current state of the #pragma [no_]long_calls. */
4598 OFF, /* No #pragma [no_]long_calls is in effect. */
4599 LONG, /* #pragma long_calls is in effect. */
4600 SHORT /* #pragma no_long_calls is in effect. */
4603 static arm_pragma_enum arm_pragma_long_calls = OFF;
4606 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4608 arm_pragma_long_calls = LONG;
4612 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4614 arm_pragma_long_calls = SHORT;
4618 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4620 arm_pragma_long_calls = OFF;
4623 /* Handle an attribute requiring a FUNCTION_DECL;
4624 arguments as in struct attribute_spec.handler. */
4626 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4627 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4629 if (TREE_CODE (*node) != FUNCTION_DECL)
4631 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4633 *no_add_attrs = true;
4639 /* Handle an "interrupt" or "isr" attribute;
4640 arguments as in struct attribute_spec.handler. */
4642 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4647 if (TREE_CODE (*node) != FUNCTION_DECL)
4649 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4651 *no_add_attrs = true;
4653 /* FIXME: the argument if any is checked for type attributes;
4654 should it be checked for decl ones? */
4658 if (TREE_CODE (*node) == FUNCTION_TYPE
4659 || TREE_CODE (*node) == METHOD_TYPE)
4661 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4663 warning (OPT_Wattributes, "%qE attribute ignored",
4665 *no_add_attrs = true;
4668 else if (TREE_CODE (*node) == POINTER_TYPE
4669 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4670 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4671 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4673 *node = build_variant_type_copy (*node);
4674 TREE_TYPE (*node) = build_type_attribute_variant
4676 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4677 *no_add_attrs = true;
4681 /* Possibly pass this attribute on from the type to a decl. */
4682 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4683 | (int) ATTR_FLAG_FUNCTION_NEXT
4684 | (int) ATTR_FLAG_ARRAY_NEXT))
4686 *no_add_attrs = true;
4687 return tree_cons (name, args, NULL_TREE);
4691 warning (OPT_Wattributes, "%qE attribute ignored",
4700 /* Handle a "pcs" attribute; arguments as in struct
4701 attribute_spec.handler. */
4703 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4704 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4706 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4708 warning (OPT_Wattributes, "%qE attribute ignored", name);
4709 *no_add_attrs = true;
4714 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4715 /* Handle the "notshared" attribute. This attribute is another way of
4716 requesting hidden visibility. ARM's compiler supports
4717 "__declspec(notshared)"; we support the same thing via an
4721 arm_handle_notshared_attribute (tree *node,
4722 tree name ATTRIBUTE_UNUSED,
4723 tree args ATTRIBUTE_UNUSED,
4724 int flags ATTRIBUTE_UNUSED,
4727 tree decl = TYPE_NAME (*node);
4731 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4732 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4733 *no_add_attrs = false;
4739 /* Return 0 if the attributes for two types are incompatible, 1 if they
4740 are compatible, and 2 if they are nearly compatible (which causes a
4741 warning to be generated). */
4743 arm_comp_type_attributes (const_tree type1, const_tree type2)
4747 /* Check for mismatch of non-default calling convention. */
4748 if (TREE_CODE (type1) != FUNCTION_TYPE)
4751 /* Check for mismatched call attributes. */
4752 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4753 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4754 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4755 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4757 /* Only bother to check if an attribute is defined. */
4758 if (l1 | l2 | s1 | s2)
4760 /* If one type has an attribute, the other must have the same attribute. */
4761 if ((l1 != l2) || (s1 != s2))
4764 /* Disallow mixed attributes. */
4765 if ((l1 & s2) || (l2 & s1))
4769 /* Check for mismatched ISR attribute. */
4770 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4772 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4773 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4775 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4782 /* Assigns default attributes to newly defined type. This is used to
4783 set short_call/long_call attributes for function types of
4784 functions defined inside corresponding #pragma scopes. */
4786 arm_set_default_type_attributes (tree type)
4788 /* Add __attribute__ ((long_call)) to all functions, when
4789 inside #pragma long_calls or __attribute__ ((short_call)),
4790 when inside #pragma no_long_calls. */
4791 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4793 tree type_attr_list, attr_name;
4794 type_attr_list = TYPE_ATTRIBUTES (type);
4796 if (arm_pragma_long_calls == LONG)
4797 attr_name = get_identifier ("long_call");
4798 else if (arm_pragma_long_calls == SHORT)
4799 attr_name = get_identifier ("short_call");
4803 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4804 TYPE_ATTRIBUTES (type) = type_attr_list;
4808 /* Return true if DECL is known to be linked into section SECTION. */
4811 arm_function_in_section_p (tree decl, section *section)
4813 /* We can only be certain about functions defined in the same
4814 compilation unit. */
4815 if (!TREE_STATIC (decl))
4818 /* Make sure that SYMBOL always binds to the definition in this
4819 compilation unit. */
4820 if (!targetm.binds_local_p (decl))
4823 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4824 if (!DECL_SECTION_NAME (decl))
4826 /* Make sure that we will not create a unique section for DECL. */
4827 if (flag_function_sections || DECL_ONE_ONLY (decl))
4831 return function_section (decl) == section;
4834 /* Return nonzero if a 32-bit "long_call" should be generated for
4835 a call from the current function to DECL. We generate a long_call
4838 a. has an __attribute__((long call))
4839 or b. is within the scope of a #pragma long_calls
4840 or c. the -mlong-calls command line switch has been specified
4842 However we do not generate a long call if the function:
4844 d. has an __attribute__ ((short_call))
4845 or e. is inside the scope of a #pragma no_long_calls
4846 or f. is defined in the same section as the current function. */
4849 arm_is_long_call_p (tree decl)
4854 return TARGET_LONG_CALLS;
4856 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4857 if (lookup_attribute ("short_call", attrs))
4860 /* For "f", be conservative, and only cater for cases in which the
4861 whole of the current function is placed in the same section. */
4862 if (!flag_reorder_blocks_and_partition
4863 && TREE_CODE (decl) == FUNCTION_DECL
4864 && arm_function_in_section_p (decl, current_function_section ()))
4867 if (lookup_attribute ("long_call", attrs))
4870 return TARGET_LONG_CALLS;
4873 /* Return nonzero if it is ok to make a tail-call to DECL. */
4875 arm_function_ok_for_sibcall (tree decl, tree exp)
4877 unsigned long func_type;
4879 if (cfun->machine->sibcall_blocked)
4882 /* Never tailcall something for which we have no decl, or if we
4883 are generating code for Thumb-1. */
4884 if (decl == NULL || TARGET_THUMB1)
4887 /* The PIC register is live on entry to VxWorks PLT entries, so we
4888 must make the call before restoring the PIC register. */
4889 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4892 /* Cannot tail-call to long calls, since these are out of range of
4893 a branch instruction. */
4894 if (arm_is_long_call_p (decl))
4897 /* If we are interworking and the function is not declared static
4898 then we can't tail-call it unless we know that it exists in this
4899 compilation unit (since it might be a Thumb routine). */
4900 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4903 func_type = arm_current_func_type ();
4904 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4905 if (IS_INTERRUPT (func_type))
4908 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4910 /* Check that the return value locations are the same. For
4911 example that we aren't returning a value from the sibling in
4912 a VFP register but then need to transfer it to a core
4916 a = arm_function_value (TREE_TYPE (exp), decl, false);
4917 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4919 if (!rtx_equal_p (a, b))
4923 /* Never tailcall if function may be called with a misaligned SP. */
4924 if (IS_STACKALIGN (func_type))
4927 /* Everything else is ok. */
4932 /* Addressing mode support functions. */
4934 /* Return nonzero if X is a legitimate immediate operand when compiling
4935 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4937 legitimate_pic_operand_p (rtx x)
4939 if (GET_CODE (x) == SYMBOL_REF
4940 || (GET_CODE (x) == CONST
4941 && GET_CODE (XEXP (x, 0)) == PLUS
4942 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4948 /* Record that the current function needs a PIC register. Initialize
4949 cfun->machine->pic_reg if we have not already done so. */
4952 require_pic_register (void)
4954 /* A lot of the logic here is made obscure by the fact that this
4955 routine gets called as part of the rtx cost estimation process.
4956 We don't want those calls to affect any assumptions about the real
4957 function; and further, we can't call entry_of_function() until we
4958 start the real expansion process. */
4959 if (!crtl->uses_pic_offset_table)
4961 gcc_assert (can_create_pseudo_p ());
4962 if (arm_pic_register != INVALID_REGNUM)
4964 if (!cfun->machine->pic_reg)
4965 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4967 /* Play games to avoid marking the function as needing pic
4968 if we are being called as part of the cost-estimation
4970 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4971 crtl->uses_pic_offset_table = 1;
4977 if (!cfun->machine->pic_reg)
4978 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4980 /* Play games to avoid marking the function as needing pic
4981 if we are being called as part of the cost-estimation
4983 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4985 crtl->uses_pic_offset_table = 1;
4988 arm_load_pic_register (0UL);
4992 /* We can be called during expansion of PHI nodes, where
4993 we can't yet emit instructions directly in the final
4994 insn stream. Queue the insns on the entry edge, they will
4995 be committed after everything else is expanded. */
4996 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5003 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5005 if (GET_CODE (orig) == SYMBOL_REF
5006 || GET_CODE (orig) == LABEL_REF)
5012 gcc_assert (can_create_pseudo_p ());
5013 reg = gen_reg_rtx (Pmode);
5016 /* VxWorks does not impose a fixed gap between segments; the run-time
5017 gap can be different from the object-file gap. We therefore can't
5018 use GOTOFF unless we are absolutely sure that the symbol is in the
5019 same segment as the GOT. Unfortunately, the flexibility of linker
5020 scripts means that we can't be sure of that in general, so assume
5021 that GOTOFF is never valid on VxWorks. */
5022 if ((GET_CODE (orig) == LABEL_REF
5023 || (GET_CODE (orig) == SYMBOL_REF &&
5024 SYMBOL_REF_LOCAL_P (orig)))
5026 && !TARGET_VXWORKS_RTP)
5027 insn = arm_pic_static_addr (orig, reg);
5033 /* If this function doesn't have a pic register, create one now. */
5034 require_pic_register ();
5036 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5038 /* Make the MEM as close to a constant as possible. */
5039 mem = SET_SRC (pat);
5040 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5041 MEM_READONLY_P (mem) = 1;
5042 MEM_NOTRAP_P (mem) = 1;
5044 insn = emit_insn (pat);
5047 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5049 set_unique_reg_note (insn, REG_EQUAL, orig);
5053 else if (GET_CODE (orig) == CONST)
5057 if (GET_CODE (XEXP (orig, 0)) == PLUS
5058 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5061 /* Handle the case where we have: const (UNSPEC_TLS). */
5062 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5063 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5066 /* Handle the case where we have:
5067 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5069 if (GET_CODE (XEXP (orig, 0)) == PLUS
5070 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5071 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5073 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5079 gcc_assert (can_create_pseudo_p ());
5080 reg = gen_reg_rtx (Pmode);
5083 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5085 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5086 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5087 base == reg ? 0 : reg);
5089 if (GET_CODE (offset) == CONST_INT)
5091 /* The base register doesn't really matter, we only want to
5092 test the index for the appropriate mode. */
5093 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5095 gcc_assert (can_create_pseudo_p ());
5096 offset = force_reg (Pmode, offset);
5099 if (GET_CODE (offset) == CONST_INT)
5100 return plus_constant (base, INTVAL (offset));
5103 if (GET_MODE_SIZE (mode) > 4
5104 && (GET_MODE_CLASS (mode) == MODE_INT
5105 || TARGET_SOFT_FLOAT))
5107 emit_insn (gen_addsi3 (reg, base, offset));
5111 return gen_rtx_PLUS (Pmode, base, offset);
5118 /* Find a spare register to use during the prolog of a function. */
5121 thumb_find_work_register (unsigned long pushed_regs_mask)
5125 /* Check the argument registers first as these are call-used. The
5126 register allocation order means that sometimes r3 might be used
5127 but earlier argument registers might not, so check them all. */
5128 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5129 if (!df_regs_ever_live_p (reg))
5132 /* Before going on to check the call-saved registers we can try a couple
5133 more ways of deducing that r3 is available. The first is when we are
5134 pushing anonymous arguments onto the stack and we have less than 4
5135 registers worth of fixed arguments(*). In this case r3 will be part of
5136 the variable argument list and so we can be sure that it will be
5137 pushed right at the start of the function. Hence it will be available
5138 for the rest of the prologue.
5139 (*): ie crtl->args.pretend_args_size is greater than 0. */
5140 if (cfun->machine->uses_anonymous_args
5141 && crtl->args.pretend_args_size > 0)
5142 return LAST_ARG_REGNUM;
5144 /* The other case is when we have fixed arguments but less than 4 registers
5145 worth. In this case r3 might be used in the body of the function, but
5146 it is not being used to convey an argument into the function. In theory
5147 we could just check crtl->args.size to see how many bytes are
5148 being passed in argument registers, but it seems that it is unreliable.
5149 Sometimes it will have the value 0 when in fact arguments are being
5150 passed. (See testcase execute/20021111-1.c for an example). So we also
5151 check the args_info.nregs field as well. The problem with this field is
5152 that it makes no allowances for arguments that are passed to the
5153 function but which are not used. Hence we could miss an opportunity
5154 when a function has an unused argument in r3. But it is better to be
5155 safe than to be sorry. */
5156 if (! cfun->machine->uses_anonymous_args
5157 && crtl->args.size >= 0
5158 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5159 && crtl->args.info.nregs < 4)
5160 return LAST_ARG_REGNUM;
5162 /* Otherwise look for a call-saved register that is going to be pushed. */
5163 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5164 if (pushed_regs_mask & (1 << reg))
5169 /* Thumb-2 can use high regs. */
5170 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5171 if (pushed_regs_mask & (1 << reg))
5174 /* Something went wrong - thumb_compute_save_reg_mask()
5175 should have arranged for a suitable register to be pushed. */
5179 static GTY(()) int pic_labelno;
5181 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5185 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5187 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5189 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5192 gcc_assert (flag_pic);
5194 pic_reg = cfun->machine->pic_reg;
5195 if (TARGET_VXWORKS_RTP)
5197 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5198 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5199 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5201 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5203 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5204 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5208 /* We use an UNSPEC rather than a LABEL_REF because this label
5209 never appears in the code stream. */
5211 labelno = GEN_INT (pic_labelno++);
5212 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5213 l1 = gen_rtx_CONST (VOIDmode, l1);
5215 /* On the ARM the PC register contains 'dot + 8' at the time of the
5216 addition, on the Thumb it is 'dot + 4'. */
5217 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5218 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5220 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5224 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5226 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5228 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5230 else /* TARGET_THUMB1 */
5232 if (arm_pic_register != INVALID_REGNUM
5233 && REGNO (pic_reg) > LAST_LO_REGNUM)
5235 /* We will have pushed the pic register, so we should always be
5236 able to find a work register. */
5237 pic_tmp = gen_rtx_REG (SImode,
5238 thumb_find_work_register (saved_regs));
5239 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5240 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5243 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5244 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5248 /* Need to emit this whether or not we obey regdecls,
5249 since setjmp/longjmp can cause life info to screw up. */
5253 /* Generate code to load the address of a static var when flag_pic is set. */
5255 arm_pic_static_addr (rtx orig, rtx reg)
5257 rtx l1, labelno, offset_rtx, insn;
5259 gcc_assert (flag_pic);
5261 /* We use an UNSPEC rather than a LABEL_REF because this label
5262 never appears in the code stream. */
5263 labelno = GEN_INT (pic_labelno++);
5264 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5265 l1 = gen_rtx_CONST (VOIDmode, l1);
5267 /* On the ARM the PC register contains 'dot + 8' at the time of the
5268 addition, on the Thumb it is 'dot + 4'. */
5269 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5270 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5271 UNSPEC_SYMBOL_OFFSET);
5272 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5276 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5278 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5280 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5282 else /* TARGET_THUMB1 */
5284 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5285 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5291 /* Return nonzero if X is valid as an ARM state addressing register. */
5293 arm_address_register_rtx_p (rtx x, int strict_p)
5297 if (GET_CODE (x) != REG)
5303 return ARM_REGNO_OK_FOR_BASE_P (regno);
5305 return (regno <= LAST_ARM_REGNUM
5306 || regno >= FIRST_PSEUDO_REGISTER
5307 || regno == FRAME_POINTER_REGNUM
5308 || regno == ARG_POINTER_REGNUM);
5311 /* Return TRUE if this rtx is the difference of a symbol and a label,
5312 and will reduce to a PC-relative relocation in the object file.
5313 Expressions like this can be left alone when generating PIC, rather
5314 than forced through the GOT. */
5316 pcrel_constant_p (rtx x)
5318 if (GET_CODE (x) == MINUS)
5319 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5324 /* Return true if X will surely end up in an index register after next
5327 will_be_in_index_register (const_rtx x)
5329 /* arm.md: calculate_pic_address will split this into a register. */
5330 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5333 /* Return nonzero if X is a valid ARM state address operand. */
5335 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5339 enum rtx_code code = GET_CODE (x);
5341 if (arm_address_register_rtx_p (x, strict_p))
5344 use_ldrd = (TARGET_LDRD
5346 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5348 if (code == POST_INC || code == PRE_DEC
5349 || ((code == PRE_INC || code == POST_DEC)
5350 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5351 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5353 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5354 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5355 && GET_CODE (XEXP (x, 1)) == PLUS
5356 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5358 rtx addend = XEXP (XEXP (x, 1), 1);
5360 /* Don't allow ldrd post increment by register because it's hard
5361 to fixup invalid register choices. */
5363 && GET_CODE (x) == POST_MODIFY
5364 && GET_CODE (addend) == REG)
5367 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5368 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5371 /* After reload constants split into minipools will have addresses
5372 from a LABEL_REF. */
5373 else if (reload_completed
5374 && (code == LABEL_REF
5376 && GET_CODE (XEXP (x, 0)) == PLUS
5377 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5378 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5381 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5384 else if (code == PLUS)
5386 rtx xop0 = XEXP (x, 0);
5387 rtx xop1 = XEXP (x, 1);
5389 return ((arm_address_register_rtx_p (xop0, strict_p)
5390 && ((GET_CODE(xop1) == CONST_INT
5391 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5392 || (!strict_p && will_be_in_index_register (xop1))))
5393 || (arm_address_register_rtx_p (xop1, strict_p)
5394 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5398 /* Reload currently can't handle MINUS, so disable this for now */
5399 else if (GET_CODE (x) == MINUS)
5401 rtx xop0 = XEXP (x, 0);
5402 rtx xop1 = XEXP (x, 1);
5404 return (arm_address_register_rtx_p (xop0, strict_p)
5405 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5409 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5410 && code == SYMBOL_REF
5411 && CONSTANT_POOL_ADDRESS_P (x)
5413 && symbol_mentioned_p (get_pool_constant (x))
5414 && ! pcrel_constant_p (get_pool_constant (x))))
5420 /* Return nonzero if X is a valid Thumb-2 address operand. */
5422 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5425 enum rtx_code code = GET_CODE (x);
5427 if (arm_address_register_rtx_p (x, strict_p))
5430 use_ldrd = (TARGET_LDRD
5432 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5434 if (code == POST_INC || code == PRE_DEC
5435 || ((code == PRE_INC || code == POST_DEC)
5436 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5437 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5439 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5440 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5441 && GET_CODE (XEXP (x, 1)) == PLUS
5442 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5444 /* Thumb-2 only has autoincrement by constant. */
5445 rtx addend = XEXP (XEXP (x, 1), 1);
5446 HOST_WIDE_INT offset;
5448 if (GET_CODE (addend) != CONST_INT)
5451 offset = INTVAL(addend);
5452 if (GET_MODE_SIZE (mode) <= 4)
5453 return (offset > -256 && offset < 256);
5455 return (use_ldrd && offset > -1024 && offset < 1024
5456 && (offset & 3) == 0);
5459 /* After reload constants split into minipools will have addresses
5460 from a LABEL_REF. */
5461 else if (reload_completed
5462 && (code == LABEL_REF
5464 && GET_CODE (XEXP (x, 0)) == PLUS
5465 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5466 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5469 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5472 else if (code == PLUS)
5474 rtx xop0 = XEXP (x, 0);
5475 rtx xop1 = XEXP (x, 1);
5477 return ((arm_address_register_rtx_p (xop0, strict_p)
5478 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5479 || (!strict_p && will_be_in_index_register (xop1))))
5480 || (arm_address_register_rtx_p (xop1, strict_p)
5481 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5484 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5485 && code == SYMBOL_REF
5486 && CONSTANT_POOL_ADDRESS_P (x)
5488 && symbol_mentioned_p (get_pool_constant (x))
5489 && ! pcrel_constant_p (get_pool_constant (x))))
5495 /* Return nonzero if INDEX is valid for an address index operand in
5498 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5501 HOST_WIDE_INT range;
5502 enum rtx_code code = GET_CODE (index);
5504 /* Standard coprocessor addressing modes. */
5505 if (TARGET_HARD_FLOAT
5506 && (TARGET_FPA || TARGET_MAVERICK)
5507 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5508 || (TARGET_MAVERICK && mode == DImode)))
5509 return (code == CONST_INT && INTVAL (index) < 1024
5510 && INTVAL (index) > -1024
5511 && (INTVAL (index) & 3) == 0);
5514 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5515 return (code == CONST_INT
5516 && INTVAL (index) < 1016
5517 && INTVAL (index) > -1024
5518 && (INTVAL (index) & 3) == 0);
5520 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5521 return (code == CONST_INT
5522 && INTVAL (index) < 1024
5523 && INTVAL (index) > -1024
5524 && (INTVAL (index) & 3) == 0);
5526 if (arm_address_register_rtx_p (index, strict_p)
5527 && (GET_MODE_SIZE (mode) <= 4))
5530 if (mode == DImode || mode == DFmode)
5532 if (code == CONST_INT)
5534 HOST_WIDE_INT val = INTVAL (index);
5537 return val > -256 && val < 256;
5539 return val > -4096 && val < 4092;
5542 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5545 if (GET_MODE_SIZE (mode) <= 4
5549 || (mode == QImode && outer == SIGN_EXTEND))))
5553 rtx xiop0 = XEXP (index, 0);
5554 rtx xiop1 = XEXP (index, 1);
5556 return ((arm_address_register_rtx_p (xiop0, strict_p)
5557 && power_of_two_operand (xiop1, SImode))
5558 || (arm_address_register_rtx_p (xiop1, strict_p)
5559 && power_of_two_operand (xiop0, SImode)));
5561 else if (code == LSHIFTRT || code == ASHIFTRT
5562 || code == ASHIFT || code == ROTATERT)
5564 rtx op = XEXP (index, 1);
5566 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5567 && GET_CODE (op) == CONST_INT
5569 && INTVAL (op) <= 31);
5573 /* For ARM v4 we may be doing a sign-extend operation during the
5579 || (outer == SIGN_EXTEND && mode == QImode))
5585 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5587 return (code == CONST_INT
5588 && INTVAL (index) < range
5589 && INTVAL (index) > -range);
5592 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5593 index operand. i.e. 1, 2, 4 or 8. */
5595 thumb2_index_mul_operand (rtx op)
5599 if (GET_CODE(op) != CONST_INT)
5603 return (val == 1 || val == 2 || val == 4 || val == 8);
5606 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5608 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5610 enum rtx_code code = GET_CODE (index);
5612 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5613 /* Standard coprocessor addressing modes. */
5614 if (TARGET_HARD_FLOAT
5615 && (TARGET_FPA || TARGET_MAVERICK)
5616 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5617 || (TARGET_MAVERICK && mode == DImode)))
5618 return (code == CONST_INT && INTVAL (index) < 1024
5619 && INTVAL (index) > -1024
5620 && (INTVAL (index) & 3) == 0);
5622 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5624 /* For DImode assume values will usually live in core regs
5625 and only allow LDRD addressing modes. */
5626 if (!TARGET_LDRD || mode != DImode)
5627 return (code == CONST_INT
5628 && INTVAL (index) < 1024
5629 && INTVAL (index) > -1024
5630 && (INTVAL (index) & 3) == 0);
5634 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5635 return (code == CONST_INT
5636 && INTVAL (index) < 1016
5637 && INTVAL (index) > -1024
5638 && (INTVAL (index) & 3) == 0);
5640 if (arm_address_register_rtx_p (index, strict_p)
5641 && (GET_MODE_SIZE (mode) <= 4))
5644 if (mode == DImode || mode == DFmode)
5646 if (code == CONST_INT)
5648 HOST_WIDE_INT val = INTVAL (index);
5649 /* ??? Can we assume ldrd for thumb2? */
5650 /* Thumb-2 ldrd only has reg+const addressing modes. */
5651 /* ldrd supports offsets of +-1020.
5652 However the ldr fallback does not. */
5653 return val > -256 && val < 256 && (val & 3) == 0;
5661 rtx xiop0 = XEXP (index, 0);
5662 rtx xiop1 = XEXP (index, 1);
5664 return ((arm_address_register_rtx_p (xiop0, strict_p)
5665 && thumb2_index_mul_operand (xiop1))
5666 || (arm_address_register_rtx_p (xiop1, strict_p)
5667 && thumb2_index_mul_operand (xiop0)));
5669 else if (code == ASHIFT)
5671 rtx op = XEXP (index, 1);
5673 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5674 && GET_CODE (op) == CONST_INT
5676 && INTVAL (op) <= 3);
5679 return (code == CONST_INT
5680 && INTVAL (index) < 4096
5681 && INTVAL (index) > -256);
5684 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5686 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5690 if (GET_CODE (x) != REG)
5696 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5698 return (regno <= LAST_LO_REGNUM
5699 || regno > LAST_VIRTUAL_REGISTER
5700 || regno == FRAME_POINTER_REGNUM
5701 || (GET_MODE_SIZE (mode) >= 4
5702 && (regno == STACK_POINTER_REGNUM
5703 || regno >= FIRST_PSEUDO_REGISTER
5704 || x == hard_frame_pointer_rtx
5705 || x == arg_pointer_rtx)));
5708 /* Return nonzero if x is a legitimate index register. This is the case
5709 for any base register that can access a QImode object. */
5711 thumb1_index_register_rtx_p (rtx x, int strict_p)
5713 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5716 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5718 The AP may be eliminated to either the SP or the FP, so we use the
5719 least common denominator, e.g. SImode, and offsets from 0 to 64.
5721 ??? Verify whether the above is the right approach.
5723 ??? Also, the FP may be eliminated to the SP, so perhaps that
5724 needs special handling also.
5726 ??? Look at how the mips16 port solves this problem. It probably uses
5727 better ways to solve some of these problems.
5729 Although it is not incorrect, we don't accept QImode and HImode
5730 addresses based on the frame pointer or arg pointer until the
5731 reload pass starts. This is so that eliminating such addresses
5732 into stack based ones won't produce impossible code. */
5734 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5736 /* ??? Not clear if this is right. Experiment. */
5737 if (GET_MODE_SIZE (mode) < 4
5738 && !(reload_in_progress || reload_completed)
5739 && (reg_mentioned_p (frame_pointer_rtx, x)
5740 || reg_mentioned_p (arg_pointer_rtx, x)
5741 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5742 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5743 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5744 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5747 /* Accept any base register. SP only in SImode or larger. */
5748 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5751 /* This is PC relative data before arm_reorg runs. */
5752 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5753 && GET_CODE (x) == SYMBOL_REF
5754 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5757 /* This is PC relative data after arm_reorg runs. */
5758 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5760 && (GET_CODE (x) == LABEL_REF
5761 || (GET_CODE (x) == CONST
5762 && GET_CODE (XEXP (x, 0)) == PLUS
5763 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5764 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5767 /* Post-inc indexing only supported for SImode and larger. */
5768 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5769 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5772 else if (GET_CODE (x) == PLUS)
5774 /* REG+REG address can be any two index registers. */
5775 /* We disallow FRAME+REG addressing since we know that FRAME
5776 will be replaced with STACK, and SP relative addressing only
5777 permits SP+OFFSET. */
5778 if (GET_MODE_SIZE (mode) <= 4
5779 && XEXP (x, 0) != frame_pointer_rtx
5780 && XEXP (x, 1) != frame_pointer_rtx
5781 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5782 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5783 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5786 /* REG+const has 5-7 bit offset for non-SP registers. */
5787 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5788 || XEXP (x, 0) == arg_pointer_rtx)
5789 && GET_CODE (XEXP (x, 1)) == CONST_INT
5790 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5793 /* REG+const has 10-bit offset for SP, but only SImode and
5794 larger is supported. */
5795 /* ??? Should probably check for DI/DFmode overflow here
5796 just like GO_IF_LEGITIMATE_OFFSET does. */
5797 else if (GET_CODE (XEXP (x, 0)) == REG
5798 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5799 && GET_MODE_SIZE (mode) >= 4
5800 && GET_CODE (XEXP (x, 1)) == CONST_INT
5801 && INTVAL (XEXP (x, 1)) >= 0
5802 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5803 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5806 else if (GET_CODE (XEXP (x, 0)) == REG
5807 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5808 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5809 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5810 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5811 && GET_MODE_SIZE (mode) >= 4
5812 && GET_CODE (XEXP (x, 1)) == CONST_INT
5813 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5817 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5818 && GET_MODE_SIZE (mode) == 4
5819 && GET_CODE (x) == SYMBOL_REF
5820 && CONSTANT_POOL_ADDRESS_P (x)
5822 && symbol_mentioned_p (get_pool_constant (x))
5823 && ! pcrel_constant_p (get_pool_constant (x))))
5829 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5830 instruction of mode MODE. */
5832 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5834 switch (GET_MODE_SIZE (mode))
5837 return val >= 0 && val < 32;
5840 return val >= 0 && val < 64 && (val & 1) == 0;
5844 && (val + GET_MODE_SIZE (mode)) <= 128
5850 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5853 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5854 else if (TARGET_THUMB2)
5855 return thumb2_legitimate_address_p (mode, x, strict_p);
5856 else /* if (TARGET_THUMB1) */
5857 return thumb1_legitimate_address_p (mode, x, strict_p);
5860 /* Build the SYMBOL_REF for __tls_get_addr. */
5862 static GTY(()) rtx tls_get_addr_libfunc;
5865 get_tls_get_addr (void)
5867 if (!tls_get_addr_libfunc)
5868 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5869 return tls_get_addr_libfunc;
5873 arm_load_tp (rtx target)
5876 target = gen_reg_rtx (SImode);
5880 /* Can return in any reg. */
5881 emit_insn (gen_load_tp_hard (target));
5885 /* Always returned in r0. Immediately copy the result into a pseudo,
5886 otherwise other uses of r0 (e.g. setting up function arguments) may
5887 clobber the value. */
5891 emit_insn (gen_load_tp_soft ());
5893 tmp = gen_rtx_REG (SImode, 0);
5894 emit_move_insn (target, tmp);
5900 load_tls_operand (rtx x, rtx reg)
5904 if (reg == NULL_RTX)
5905 reg = gen_reg_rtx (SImode);
5907 tmp = gen_rtx_CONST (SImode, x);
5909 emit_move_insn (reg, tmp);
5915 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5917 rtx insns, label, labelno, sum;
5921 labelno = GEN_INT (pic_labelno++);
5922 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5923 label = gen_rtx_CONST (VOIDmode, label);
5925 sum = gen_rtx_UNSPEC (Pmode,
5926 gen_rtvec (4, x, GEN_INT (reloc), label,
5927 GEN_INT (TARGET_ARM ? 8 : 4)),
5929 reg = load_tls_operand (sum, reg);
5932 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5933 else if (TARGET_THUMB2)
5934 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5935 else /* TARGET_THUMB1 */
5936 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5938 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5939 Pmode, 1, reg, Pmode);
5941 insns = get_insns ();
5948 legitimize_tls_address (rtx x, rtx reg)
5950 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5951 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5955 case TLS_MODEL_GLOBAL_DYNAMIC:
5956 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5957 dest = gen_reg_rtx (Pmode);
5958 emit_libcall_block (insns, dest, ret, x);
5961 case TLS_MODEL_LOCAL_DYNAMIC:
5962 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5964 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5965 share the LDM result with other LD model accesses. */
5966 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5968 dest = gen_reg_rtx (Pmode);
5969 emit_libcall_block (insns, dest, ret, eqv);
5971 /* Load the addend. */
5972 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5974 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5975 return gen_rtx_PLUS (Pmode, dest, addend);
5977 case TLS_MODEL_INITIAL_EXEC:
5978 labelno = GEN_INT (pic_labelno++);
5979 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5980 label = gen_rtx_CONST (VOIDmode, label);
5981 sum = gen_rtx_UNSPEC (Pmode,
5982 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5983 GEN_INT (TARGET_ARM ? 8 : 4)),
5985 reg = load_tls_operand (sum, reg);
5988 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5989 else if (TARGET_THUMB2)
5990 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
5993 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5994 emit_move_insn (reg, gen_const_mem (SImode, reg));
5997 tp = arm_load_tp (NULL_RTX);
5999 return gen_rtx_PLUS (Pmode, tp, reg);
6001 case TLS_MODEL_LOCAL_EXEC:
6002 tp = arm_load_tp (NULL_RTX);
6004 reg = gen_rtx_UNSPEC (Pmode,
6005 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6007 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6009 return gen_rtx_PLUS (Pmode, tp, reg);
6016 /* Try machine-dependent ways of modifying an illegitimate address
6017 to be legitimate. If we find one, return the new, valid address. */
6019 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6023 /* TODO: legitimize_address for Thumb2. */
6026 return thumb_legitimize_address (x, orig_x, mode);
6029 if (arm_tls_symbol_p (x))
6030 return legitimize_tls_address (x, NULL_RTX);
6032 if (GET_CODE (x) == PLUS)
6034 rtx xop0 = XEXP (x, 0);
6035 rtx xop1 = XEXP (x, 1);
6037 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6038 xop0 = force_reg (SImode, xop0);
6040 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6041 xop1 = force_reg (SImode, xop1);
6043 if (ARM_BASE_REGISTER_RTX_P (xop0)
6044 && GET_CODE (xop1) == CONST_INT)
6046 HOST_WIDE_INT n, low_n;
6050 /* VFP addressing modes actually allow greater offsets, but for
6051 now we just stick with the lowest common denominator. */
6053 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6065 low_n = ((mode) == TImode ? 0
6066 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6070 base_reg = gen_reg_rtx (SImode);
6071 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6072 emit_move_insn (base_reg, val);
6073 x = plus_constant (base_reg, low_n);
6075 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6076 x = gen_rtx_PLUS (SImode, xop0, xop1);
6079 /* XXX We don't allow MINUS any more -- see comment in
6080 arm_legitimate_address_outer_p (). */
6081 else if (GET_CODE (x) == MINUS)
6083 rtx xop0 = XEXP (x, 0);
6084 rtx xop1 = XEXP (x, 1);
6086 if (CONSTANT_P (xop0))
6087 xop0 = force_reg (SImode, xop0);
6089 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6090 xop1 = force_reg (SImode, xop1);
6092 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6093 x = gen_rtx_MINUS (SImode, xop0, xop1);
6096 /* Make sure to take full advantage of the pre-indexed addressing mode
6097 with absolute addresses which often allows for the base register to
6098 be factorized for multiple adjacent memory references, and it might
6099 even allows for the mini pool to be avoided entirely. */
6100 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6103 HOST_WIDE_INT mask, base, index;
6106 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6107 use a 8-bit index. So let's use a 12-bit index for SImode only and
6108 hope that arm_gen_constant will enable ldrb to use more bits. */
6109 bits = (mode == SImode) ? 12 : 8;
6110 mask = (1 << bits) - 1;
6111 base = INTVAL (x) & ~mask;
6112 index = INTVAL (x) & mask;
6113 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6115 /* It'll most probably be more efficient to generate the base
6116 with more bits set and use a negative index instead. */
6120 base_reg = force_reg (SImode, GEN_INT (base));
6121 x = plus_constant (base_reg, index);
6126 /* We need to find and carefully transform any SYMBOL and LABEL
6127 references; so go back to the original address expression. */
6128 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6130 if (new_x != orig_x)
6138 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6139 to be legitimate. If we find one, return the new, valid address. */
6141 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6143 if (arm_tls_symbol_p (x))
6144 return legitimize_tls_address (x, NULL_RTX);
6146 if (GET_CODE (x) == PLUS
6147 && GET_CODE (XEXP (x, 1)) == CONST_INT
6148 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6149 || INTVAL (XEXP (x, 1)) < 0))
6151 rtx xop0 = XEXP (x, 0);
6152 rtx xop1 = XEXP (x, 1);
6153 HOST_WIDE_INT offset = INTVAL (xop1);
6155 /* Try and fold the offset into a biasing of the base register and
6156 then offsetting that. Don't do this when optimizing for space
6157 since it can cause too many CSEs. */
6158 if (optimize_size && offset >= 0
6159 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6161 HOST_WIDE_INT delta;
6164 delta = offset - (256 - GET_MODE_SIZE (mode));
6165 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6166 delta = 31 * GET_MODE_SIZE (mode);
6168 delta = offset & (~31 * GET_MODE_SIZE (mode));
6170 xop0 = force_operand (plus_constant (xop0, offset - delta),
6172 x = plus_constant (xop0, delta);
6174 else if (offset < 0 && offset > -256)
6175 /* Small negative offsets are best done with a subtract before the
6176 dereference, forcing these into a register normally takes two
6178 x = force_operand (x, NULL_RTX);
6181 /* For the remaining cases, force the constant into a register. */
6182 xop1 = force_reg (SImode, xop1);
6183 x = gen_rtx_PLUS (SImode, xop0, xop1);
6186 else if (GET_CODE (x) == PLUS
6187 && s_register_operand (XEXP (x, 1), SImode)
6188 && !s_register_operand (XEXP (x, 0), SImode))
6190 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6192 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6197 /* We need to find and carefully transform any SYMBOL and LABEL
6198 references; so go back to the original address expression. */
6199 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6201 if (new_x != orig_x)
6209 thumb_legitimize_reload_address (rtx *x_p,
6210 enum machine_mode mode,
6211 int opnum, int type,
6212 int ind_levels ATTRIBUTE_UNUSED)
6216 if (GET_CODE (x) == PLUS
6217 && GET_MODE_SIZE (mode) < 4
6218 && REG_P (XEXP (x, 0))
6219 && XEXP (x, 0) == stack_pointer_rtx
6220 && GET_CODE (XEXP (x, 1)) == CONST_INT
6221 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6226 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6227 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6231 /* If both registers are hi-regs, then it's better to reload the
6232 entire expression rather than each register individually. That
6233 only requires one reload register rather than two. */
6234 if (GET_CODE (x) == PLUS
6235 && REG_P (XEXP (x, 0))
6236 && REG_P (XEXP (x, 1))
6237 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6238 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6243 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6244 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6251 /* Test for various thread-local symbols. */
6253 /* Return TRUE if X is a thread-local symbol. */
6256 arm_tls_symbol_p (rtx x)
6258 if (! TARGET_HAVE_TLS)
6261 if (GET_CODE (x) != SYMBOL_REF)
6264 return SYMBOL_REF_TLS_MODEL (x) != 0;
6267 /* Helper for arm_tls_referenced_p. */
6270 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6272 if (GET_CODE (*x) == SYMBOL_REF)
6273 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6275 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6276 TLS offsets, not real symbol references. */
6277 if (GET_CODE (*x) == UNSPEC
6278 && XINT (*x, 1) == UNSPEC_TLS)
6284 /* Return TRUE if X contains any TLS symbol references. */
6287 arm_tls_referenced_p (rtx x)
6289 if (! TARGET_HAVE_TLS)
6292 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6295 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6298 arm_cannot_force_const_mem (rtx x)
6302 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6304 split_const (x, &base, &offset);
6305 if (GET_CODE (base) == SYMBOL_REF
6306 && !offset_within_block_p (base, INTVAL (offset)))
6309 return arm_tls_referenced_p (x);
6312 #define REG_OR_SUBREG_REG(X) \
6313 (GET_CODE (X) == REG \
6314 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6316 #define REG_OR_SUBREG_RTX(X) \
6317 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6320 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6322 enum machine_mode mode = GET_MODE (x);
6336 return COSTS_N_INSNS (1);
6339 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6342 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6349 return COSTS_N_INSNS (2) + cycles;
6351 return COSTS_N_INSNS (1) + 16;
6354 return (COSTS_N_INSNS (1)
6355 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6356 + GET_CODE (SET_DEST (x)) == MEM));
6361 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6363 if (thumb_shiftable_const (INTVAL (x)))
6364 return COSTS_N_INSNS (2);
6365 return COSTS_N_INSNS (3);
6367 else if ((outer == PLUS || outer == COMPARE)
6368 && INTVAL (x) < 256 && INTVAL (x) > -256)
6370 else if ((outer == IOR || outer == XOR || outer == AND)
6371 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6372 return COSTS_N_INSNS (1);
6373 else if (outer == AND)
6376 /* This duplicates the tests in the andsi3 expander. */
6377 for (i = 9; i <= 31; i++)
6378 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6379 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6380 return COSTS_N_INSNS (2);
6382 else if (outer == ASHIFT || outer == ASHIFTRT
6383 || outer == LSHIFTRT)
6385 return COSTS_N_INSNS (2);
6391 return COSTS_N_INSNS (3);
6409 /* XXX another guess. */
6410 /* Memory costs quite a lot for the first word, but subsequent words
6411 load at the equivalent of a single insn each. */
6412 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6413 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6418 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6424 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6425 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6431 return total + COSTS_N_INSNS (1);
6433 /* Assume a two-shift sequence. Increase the cost slightly so
6434 we prefer actual shifts over an extend operation. */
6435 return total + 1 + COSTS_N_INSNS (2);
6443 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6445 enum machine_mode mode = GET_MODE (x);
6446 enum rtx_code subcode;
6448 enum rtx_code code = GET_CODE (x);
6454 /* Memory costs quite a lot for the first word, but subsequent words
6455 load at the equivalent of a single insn each. */
6456 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6463 if (TARGET_HARD_FLOAT && mode == SFmode)
6464 *total = COSTS_N_INSNS (2);
6465 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6466 *total = COSTS_N_INSNS (4);
6468 *total = COSTS_N_INSNS (20);
6472 if (GET_CODE (XEXP (x, 1)) == REG)
6473 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6474 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6475 *total = rtx_cost (XEXP (x, 1), code, speed);
6481 *total += COSTS_N_INSNS (4);
6486 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6487 *total += rtx_cost (XEXP (x, 0), code, speed);
6490 *total += COSTS_N_INSNS (3);
6494 *total += COSTS_N_INSNS (1);
6495 /* Increase the cost of complex shifts because they aren't any faster,
6496 and reduce dual issue opportunities. */
6497 if (arm_tune_cortex_a9
6498 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6506 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6507 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6508 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6510 *total += rtx_cost (XEXP (x, 1), code, speed);
6514 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6515 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6517 *total += rtx_cost (XEXP (x, 0), code, speed);
6524 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6526 if (TARGET_HARD_FLOAT
6528 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6530 *total = COSTS_N_INSNS (1);
6531 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6532 && arm_const_double_rtx (XEXP (x, 0)))
6534 *total += rtx_cost (XEXP (x, 1), code, speed);
6538 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6539 && arm_const_double_rtx (XEXP (x, 1)))
6541 *total += rtx_cost (XEXP (x, 0), code, speed);
6547 *total = COSTS_N_INSNS (20);
6551 *total = COSTS_N_INSNS (1);
6552 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6553 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6555 *total += rtx_cost (XEXP (x, 1), code, speed);
6559 subcode = GET_CODE (XEXP (x, 1));
6560 if (subcode == ASHIFT || subcode == ASHIFTRT
6561 || subcode == LSHIFTRT
6562 || subcode == ROTATE || subcode == ROTATERT)
6564 *total += rtx_cost (XEXP (x, 0), code, speed);
6565 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6569 /* A shift as a part of RSB costs no more than RSB itself. */
6570 if (GET_CODE (XEXP (x, 0)) == MULT
6571 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6573 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6574 *total += rtx_cost (XEXP (x, 1), code, speed);
6579 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6581 *total += rtx_cost (XEXP (x, 0), code, speed);
6582 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6586 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6587 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6589 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6590 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6591 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6592 *total += COSTS_N_INSNS (1);
6600 if (code == PLUS && arm_arch6 && mode == SImode
6601 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6602 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6604 *total = COSTS_N_INSNS (1);
6605 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6607 *total += rtx_cost (XEXP (x, 1), code, speed);
6611 /* MLA: All arguments must be registers. We filter out
6612 multiplication by a power of two, so that we fall down into
6614 if (GET_CODE (XEXP (x, 0)) == MULT
6615 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6617 /* The cost comes from the cost of the multiply. */
6621 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6623 if (TARGET_HARD_FLOAT
6625 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6627 *total = COSTS_N_INSNS (1);
6628 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6629 && arm_const_double_rtx (XEXP (x, 1)))
6631 *total += rtx_cost (XEXP (x, 0), code, speed);
6638 *total = COSTS_N_INSNS (20);
6642 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6643 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6645 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6646 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6647 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6648 *total += COSTS_N_INSNS (1);
6654 case AND: case XOR: case IOR:
6656 /* Normally the frame registers will be spilt into reg+const during
6657 reload, so it is a bad idea to combine them with other instructions,
6658 since then they might not be moved outside of loops. As a compromise
6659 we allow integration with ops that have a constant as their second
6661 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6662 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6663 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6664 *total = COSTS_N_INSNS (1);
6668 *total += COSTS_N_INSNS (2);
6669 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6670 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6672 *total += rtx_cost (XEXP (x, 0), code, speed);
6679 *total += COSTS_N_INSNS (1);
6680 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6681 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6683 *total += rtx_cost (XEXP (x, 0), code, speed);
6686 subcode = GET_CODE (XEXP (x, 0));
6687 if (subcode == ASHIFT || subcode == ASHIFTRT
6688 || subcode == LSHIFTRT
6689 || subcode == ROTATE || subcode == ROTATERT)
6691 *total += rtx_cost (XEXP (x, 1), code, speed);
6692 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6697 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6699 *total += rtx_cost (XEXP (x, 1), code, speed);
6700 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6704 if (subcode == UMIN || subcode == UMAX
6705 || subcode == SMIN || subcode == SMAX)
6707 *total = COSTS_N_INSNS (3);
6714 /* This should have been handled by the CPU specific routines. */
6718 if (arm_arch3m && mode == SImode
6719 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6720 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6721 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6722 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6723 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6724 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6726 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6729 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6733 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6735 if (TARGET_HARD_FLOAT
6737 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6739 *total = COSTS_N_INSNS (1);
6742 *total = COSTS_N_INSNS (2);
6748 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6749 if (mode == SImode && code == NOT)
6751 subcode = GET_CODE (XEXP (x, 0));
6752 if (subcode == ASHIFT || subcode == ASHIFTRT
6753 || subcode == LSHIFTRT
6754 || subcode == ROTATE || subcode == ROTATERT
6756 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6758 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6759 /* Register shifts cost an extra cycle. */
6760 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6761 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6770 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6772 *total = COSTS_N_INSNS (4);
6776 operand = XEXP (x, 0);
6778 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6779 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6780 && GET_CODE (XEXP (operand, 0)) == REG
6781 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6782 *total += COSTS_N_INSNS (1);
6783 *total += (rtx_cost (XEXP (x, 1), code, speed)
6784 + rtx_cost (XEXP (x, 2), code, speed));
6788 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6790 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6796 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6797 && mode == SImode && XEXP (x, 1) == const0_rtx)
6799 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6805 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6806 && mode == SImode && XEXP (x, 1) == const0_rtx)
6808 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6828 /* SCC insns. In the case where the comparison has already been
6829 performed, then they cost 2 instructions. Otherwise they need
6830 an additional comparison before them. */
6831 *total = COSTS_N_INSNS (2);
6832 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6839 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6845 *total += COSTS_N_INSNS (1);
6846 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6847 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6849 *total += rtx_cost (XEXP (x, 0), code, speed);
6853 subcode = GET_CODE (XEXP (x, 0));
6854 if (subcode == ASHIFT || subcode == ASHIFTRT
6855 || subcode == LSHIFTRT
6856 || subcode == ROTATE || subcode == ROTATERT)
6858 *total += rtx_cost (XEXP (x, 1), code, speed);
6859 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6864 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6866 *total += rtx_cost (XEXP (x, 1), code, speed);
6867 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6877 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6878 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6879 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6880 *total += rtx_cost (XEXP (x, 1), code, speed);
6884 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6886 if (TARGET_HARD_FLOAT
6888 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6890 *total = COSTS_N_INSNS (1);
6893 *total = COSTS_N_INSNS (20);
6896 *total = COSTS_N_INSNS (1);
6898 *total += COSTS_N_INSNS (3);
6904 if (GET_MODE_CLASS (mode) == MODE_INT)
6906 rtx op = XEXP (x, 0);
6907 enum machine_mode opmode = GET_MODE (op);
6910 *total += COSTS_N_INSNS (1);
6912 if (opmode != SImode)
6916 /* If !arm_arch4, we use one of the extendhisi2_mem
6917 or movhi_bytes patterns for HImode. For a QImode
6918 sign extension, we first zero-extend from memory
6919 and then perform a shift sequence. */
6920 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
6921 *total += COSTS_N_INSNS (2);
6924 *total += COSTS_N_INSNS (1);
6926 /* We don't have the necessary insn, so we need to perform some
6928 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
6929 /* An and with constant 255. */
6930 *total += COSTS_N_INSNS (1);
6932 /* A shift sequence. Increase costs slightly to avoid
6933 combining two shifts into an extend operation. */
6934 *total += COSTS_N_INSNS (2) + 1;
6940 switch (GET_MODE (XEXP (x, 0)))
6947 *total = COSTS_N_INSNS (1);
6957 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6961 if (const_ok_for_arm (INTVAL (x))
6962 || const_ok_for_arm (~INTVAL (x)))
6963 *total = COSTS_N_INSNS (1);
6965 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6966 INTVAL (x), NULL_RTX,
6973 *total = COSTS_N_INSNS (3);
6977 *total = COSTS_N_INSNS (1);
6981 *total = COSTS_N_INSNS (1);
6982 *total += rtx_cost (XEXP (x, 0), code, speed);
6986 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
6987 && (mode == SFmode || !TARGET_VFP_SINGLE))
6988 *total = COSTS_N_INSNS (1);
6990 *total = COSTS_N_INSNS (4);
6994 *total = COSTS_N_INSNS (4);
6999 /* Estimates the size cost of thumb1 instructions.
7000 For now most of the code is copied from thumb1_rtx_costs. We need more
7001 fine grain tuning when we have more related test cases. */
7003 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7005 enum machine_mode mode = GET_MODE (x);
7018 return COSTS_N_INSNS (1);
7021 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7023 /* Thumb1 mul instruction can't operate on const. We must Load it
7024 into a register first. */
7025 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7026 return COSTS_N_INSNS (1) + const_size;
7028 return COSTS_N_INSNS (1);
7031 return (COSTS_N_INSNS (1)
7032 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7033 + GET_CODE (SET_DEST (x)) == MEM));
7038 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7039 return COSTS_N_INSNS (1);
7040 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7041 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7042 return COSTS_N_INSNS (2);
7043 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7044 if (thumb_shiftable_const (INTVAL (x)))
7045 return COSTS_N_INSNS (2);
7046 return COSTS_N_INSNS (3);
7048 else if ((outer == PLUS || outer == COMPARE)
7049 && INTVAL (x) < 256 && INTVAL (x) > -256)
7051 else if ((outer == IOR || outer == XOR || outer == AND)
7052 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7053 return COSTS_N_INSNS (1);
7054 else if (outer == AND)
7057 /* This duplicates the tests in the andsi3 expander. */
7058 for (i = 9; i <= 31; i++)
7059 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7060 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7061 return COSTS_N_INSNS (2);
7063 else if (outer == ASHIFT || outer == ASHIFTRT
7064 || outer == LSHIFTRT)
7066 return COSTS_N_INSNS (2);
7072 return COSTS_N_INSNS (3);
7090 /* XXX another guess. */
7091 /* Memory costs quite a lot for the first word, but subsequent words
7092 load at the equivalent of a single insn each. */
7093 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7094 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7099 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7104 /* XXX still guessing. */
7105 switch (GET_MODE (XEXP (x, 0)))
7108 return (1 + (mode == DImode ? 4 : 0)
7109 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7112 return (4 + (mode == DImode ? 4 : 0)
7113 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7116 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7127 /* RTX costs when optimizing for size. */
7129 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7132 enum machine_mode mode = GET_MODE (x);
7135 *total = thumb1_size_rtx_costs (x, code, outer_code);
7139 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7143 /* A memory access costs 1 insn if the mode is small, or the address is
7144 a single register, otherwise it costs one insn per word. */
7145 if (REG_P (XEXP (x, 0)))
7146 *total = COSTS_N_INSNS (1);
7148 && GET_CODE (XEXP (x, 0)) == PLUS
7149 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7150 /* This will be split into two instructions.
7151 See arm.md:calculate_pic_address. */
7152 *total = COSTS_N_INSNS (2);
7154 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7161 /* Needs a libcall, so it costs about this. */
7162 *total = COSTS_N_INSNS (2);
7166 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7168 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7176 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7178 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7181 else if (mode == SImode)
7183 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7184 /* Slightly disparage register shifts, but not by much. */
7185 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7186 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7190 /* Needs a libcall. */
7191 *total = COSTS_N_INSNS (2);
7195 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7196 && (mode == SFmode || !TARGET_VFP_SINGLE))
7198 *total = COSTS_N_INSNS (1);
7204 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7205 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7207 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7208 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7209 || subcode1 == ROTATE || subcode1 == ROTATERT
7210 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7211 || subcode1 == ASHIFTRT)
7213 /* It's just the cost of the two operands. */
7218 *total = COSTS_N_INSNS (1);
7222 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7226 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7227 && (mode == SFmode || !TARGET_VFP_SINGLE))
7229 *total = COSTS_N_INSNS (1);
7233 /* A shift as a part of ADD costs nothing. */
7234 if (GET_CODE (XEXP (x, 0)) == MULT
7235 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7237 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7238 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7239 *total += rtx_cost (XEXP (x, 1), code, false);
7244 case AND: case XOR: case IOR:
7247 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7249 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7250 || subcode == LSHIFTRT || subcode == ASHIFTRT
7251 || (code == AND && subcode == NOT))
7253 /* It's just the cost of the two operands. */
7259 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7263 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7267 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7268 && (mode == SFmode || !TARGET_VFP_SINGLE))
7270 *total = COSTS_N_INSNS (1);
7276 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7285 if (cc_register (XEXP (x, 0), VOIDmode))
7288 *total = COSTS_N_INSNS (1);
7292 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7293 && (mode == SFmode || !TARGET_VFP_SINGLE))
7294 *total = COSTS_N_INSNS (1);
7296 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7301 return arm_rtx_costs_1 (x, outer_code, total, 0);
7304 if (const_ok_for_arm (INTVAL (x)))
7305 /* A multiplication by a constant requires another instruction
7306 to load the constant to a register. */
7307 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7309 else if (const_ok_for_arm (~INTVAL (x)))
7310 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7311 else if (const_ok_for_arm (-INTVAL (x)))
7313 if (outer_code == COMPARE || outer_code == PLUS
7314 || outer_code == MINUS)
7317 *total = COSTS_N_INSNS (1);
7320 *total = COSTS_N_INSNS (2);
7326 *total = COSTS_N_INSNS (2);
7330 *total = COSTS_N_INSNS (4);
7335 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7336 cost of these slightly. */
7337 *total = COSTS_N_INSNS (1) + 1;
7341 if (mode != VOIDmode)
7342 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7344 *total = COSTS_N_INSNS (4); /* How knows? */
7349 /* RTX costs when optimizing for size. */
7351 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7355 return arm_size_rtx_costs (x, (enum rtx_code) code,
7356 (enum rtx_code) outer_code, total);
7358 return current_tune->rtx_costs (x, (enum rtx_code) code,
7359 (enum rtx_code) outer_code,
7363 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7364 supported on any "slowmul" cores, so it can be ignored. */
7367 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7368 int *total, bool speed)
7370 enum machine_mode mode = GET_MODE (x);
7374 *total = thumb1_rtx_costs (x, code, outer_code);
7381 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7384 *total = COSTS_N_INSNS (20);
7388 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7390 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7391 & (unsigned HOST_WIDE_INT) 0xffffffff);
7392 int cost, const_ok = const_ok_for_arm (i);
7393 int j, booth_unit_size;
7395 /* Tune as appropriate. */
7396 cost = const_ok ? 4 : 8;
7397 booth_unit_size = 2;
7398 for (j = 0; i && j < 32; j += booth_unit_size)
7400 i >>= booth_unit_size;
7404 *total = COSTS_N_INSNS (cost);
7405 *total += rtx_cost (XEXP (x, 0), code, speed);
7409 *total = COSTS_N_INSNS (20);
7413 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7418 /* RTX cost for cores with a fast multiply unit (M variants). */
7421 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7422 int *total, bool speed)
7424 enum machine_mode mode = GET_MODE (x);
7428 *total = thumb1_rtx_costs (x, code, outer_code);
7432 /* ??? should thumb2 use different costs? */
7436 /* There is no point basing this on the tuning, since it is always the
7437 fast variant if it exists at all. */
7439 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7440 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7441 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7443 *total = COSTS_N_INSNS(2);
7450 *total = COSTS_N_INSNS (5);
7454 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7456 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7457 & (unsigned HOST_WIDE_INT) 0xffffffff);
7458 int cost, const_ok = const_ok_for_arm (i);
7459 int j, booth_unit_size;
7461 /* Tune as appropriate. */
7462 cost = const_ok ? 4 : 8;
7463 booth_unit_size = 8;
7464 for (j = 0; i && j < 32; j += booth_unit_size)
7466 i >>= booth_unit_size;
7470 *total = COSTS_N_INSNS(cost);
7476 *total = COSTS_N_INSNS (4);
7480 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7482 if (TARGET_HARD_FLOAT
7484 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7486 *total = COSTS_N_INSNS (1);
7491 /* Requires a lib call */
7492 *total = COSTS_N_INSNS (20);
7496 return arm_rtx_costs_1 (x, outer_code, total, speed);
7501 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7502 so it can be ignored. */
7505 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7506 int *total, bool speed)
7508 enum machine_mode mode = GET_MODE (x);
7512 *total = thumb1_rtx_costs (x, code, outer_code);
7519 if (GET_CODE (XEXP (x, 0)) != MULT)
7520 return arm_rtx_costs_1 (x, outer_code, total, speed);
7522 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7523 will stall until the multiplication is complete. */
7524 *total = COSTS_N_INSNS (3);
7528 /* There is no point basing this on the tuning, since it is always the
7529 fast variant if it exists at all. */
7531 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7532 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7533 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7535 *total = COSTS_N_INSNS (2);
7542 *total = COSTS_N_INSNS (5);
7546 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7548 /* If operand 1 is a constant we can more accurately
7549 calculate the cost of the multiply. The multiplier can
7550 retire 15 bits on the first cycle and a further 12 on the
7551 second. We do, of course, have to load the constant into
7552 a register first. */
7553 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7554 /* There's a general overhead of one cycle. */
7556 unsigned HOST_WIDE_INT masked_const;
7561 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7563 masked_const = i & 0xffff8000;
7564 if (masked_const != 0)
7567 masked_const = i & 0xf8000000;
7568 if (masked_const != 0)
7571 *total = COSTS_N_INSNS (cost);
7577 *total = COSTS_N_INSNS (3);
7581 /* Requires a lib call */
7582 *total = COSTS_N_INSNS (20);
7586 return arm_rtx_costs_1 (x, outer_code, total, speed);
7591 /* RTX costs for 9e (and later) cores. */
7594 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7595 int *total, bool speed)
7597 enum machine_mode mode = GET_MODE (x);
7604 *total = COSTS_N_INSNS (3);
7608 *total = thumb1_rtx_costs (x, code, outer_code);
7616 /* There is no point basing this on the tuning, since it is always the
7617 fast variant if it exists at all. */
7619 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7620 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7621 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7623 *total = COSTS_N_INSNS (2);
7630 *total = COSTS_N_INSNS (5);
7636 *total = COSTS_N_INSNS (2);
7640 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7642 if (TARGET_HARD_FLOAT
7644 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7646 *total = COSTS_N_INSNS (1);
7651 *total = COSTS_N_INSNS (20);
7655 return arm_rtx_costs_1 (x, outer_code, total, speed);
7658 /* All address computations that can be done are free, but rtx cost returns
7659 the same for practically all of them. So we weight the different types
7660 of address here in the order (most pref first):
7661 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7663 arm_arm_address_cost (rtx x)
7665 enum rtx_code c = GET_CODE (x);
7667 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7669 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7674 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7677 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7687 arm_thumb_address_cost (rtx x)
7689 enum rtx_code c = GET_CODE (x);
7694 && GET_CODE (XEXP (x, 0)) == REG
7695 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7702 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7704 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7707 /* Adjust cost hook for XScale. */
7709 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7711 /* Some true dependencies can have a higher cost depending
7712 on precisely how certain input operands are used. */
7713 if (REG_NOTE_KIND(link) == 0
7714 && recog_memoized (insn) >= 0
7715 && recog_memoized (dep) >= 0)
7717 int shift_opnum = get_attr_shift (insn);
7718 enum attr_type attr_type = get_attr_type (dep);
7720 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7721 operand for INSN. If we have a shifted input operand and the
7722 instruction we depend on is another ALU instruction, then we may
7723 have to account for an additional stall. */
7724 if (shift_opnum != 0
7725 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7727 rtx shifted_operand;
7730 /* Get the shifted operand. */
7731 extract_insn (insn);
7732 shifted_operand = recog_data.operand[shift_opnum];
7734 /* Iterate over all the operands in DEP. If we write an operand
7735 that overlaps with SHIFTED_OPERAND, then we have increase the
7736 cost of this dependency. */
7738 preprocess_constraints ();
7739 for (opno = 0; opno < recog_data.n_operands; opno++)
7741 /* We can ignore strict inputs. */
7742 if (recog_data.operand_type[opno] == OP_IN)
7745 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7757 /* Adjust cost hook for Cortex A9. */
7759 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7761 switch (REG_NOTE_KIND (link))
7768 case REG_DEP_OUTPUT:
7769 if (recog_memoized (insn) >= 0
7770 && recog_memoized (dep) >= 0)
7772 if (GET_CODE (PATTERN (insn)) == SET)
7775 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
7777 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
7779 enum attr_type attr_type_insn = get_attr_type (insn);
7780 enum attr_type attr_type_dep = get_attr_type (dep);
7782 /* By default all dependencies of the form
7785 have an extra latency of 1 cycle because
7786 of the input and output dependency in this
7787 case. However this gets modeled as an true
7788 dependency and hence all these checks. */
7789 if (REG_P (SET_DEST (PATTERN (insn)))
7790 && REG_P (SET_DEST (PATTERN (dep)))
7791 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
7792 SET_DEST (PATTERN (dep))))
7794 /* FMACS is a special case where the dependant
7795 instruction can be issued 3 cycles before
7796 the normal latency in case of an output
7798 if ((attr_type_insn == TYPE_FMACS
7799 || attr_type_insn == TYPE_FMACD)
7800 && (attr_type_dep == TYPE_FMACS
7801 || attr_type_dep == TYPE_FMACD))
7803 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7804 *cost = insn_default_latency (dep) - 3;
7806 *cost = insn_default_latency (dep);
7811 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7812 *cost = insn_default_latency (dep) + 1;
7814 *cost = insn_default_latency (dep);
7830 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
7831 It corrects the value of COST based on the relationship between
7832 INSN and DEP through the dependence LINK. It returns the new
7833 value. There is a per-core adjust_cost hook to adjust scheduler costs
7834 and the per-core hook can choose to completely override the generic
7835 adjust_cost function. Only put bits of code into arm_adjust_cost that
7836 are common across all cores. */
7838 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7842 /* When generating Thumb-1 code, we want to place flag-setting operations
7843 close to a conditional branch which depends on them, so that we can
7844 omit the comparison. */
7846 && REG_NOTE_KIND (link) == 0
7847 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
7848 && recog_memoized (dep) >= 0
7849 && get_attr_conds (dep) == CONDS_SET)
7852 if (current_tune->sched_adjust_cost != NULL)
7854 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
7858 /* XXX This is not strictly true for the FPA. */
7859 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7860 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7863 /* Call insns don't incur a stall, even if they follow a load. */
7864 if (REG_NOTE_KIND (link) == 0
7865 && GET_CODE (insn) == CALL_INSN)
7868 if ((i_pat = single_set (insn)) != NULL
7869 && GET_CODE (SET_SRC (i_pat)) == MEM
7870 && (d_pat = single_set (dep)) != NULL
7871 && GET_CODE (SET_DEST (d_pat)) == MEM)
7873 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7874 /* This is a load after a store, there is no conflict if the load reads
7875 from a cached area. Assume that loads from the stack, and from the
7876 constant pool are cached, and that others will miss. This is a
7879 if ((GET_CODE (src_mem) == SYMBOL_REF
7880 && CONSTANT_POOL_ADDRESS_P (src_mem))
7881 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7882 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7883 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7890 static int fp_consts_inited = 0;
7892 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7893 static const char * const strings_fp[8] =
7896 "4", "5", "0.5", "10"
7899 static REAL_VALUE_TYPE values_fp[8];
7902 init_fp_table (void)
7908 fp_consts_inited = 1;
7910 fp_consts_inited = 8;
7912 for (i = 0; i < fp_consts_inited; i++)
7914 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7919 /* Return TRUE if rtx X is a valid immediate FP constant. */
7921 arm_const_double_rtx (rtx x)
7926 if (!fp_consts_inited)
7929 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7930 if (REAL_VALUE_MINUS_ZERO (r))
7933 for (i = 0; i < fp_consts_inited; i++)
7934 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7940 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7942 neg_const_double_rtx_ok_for_fpa (rtx x)
7947 if (!fp_consts_inited)
7950 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7951 r = real_value_negate (&r);
7952 if (REAL_VALUE_MINUS_ZERO (r))
7955 for (i = 0; i < 8; i++)
7956 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7963 /* VFPv3 has a fairly wide range of representable immediates, formed from
7964 "quarter-precision" floating-point values. These can be evaluated using this
7965 formula (with ^ for exponentiation):
7969 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7970 16 <= n <= 31 and 0 <= r <= 7.
7972 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7974 - A (most-significant) is the sign bit.
7975 - BCD are the exponent (encoded as r XOR 3).
7976 - EFGH are the mantissa (encoded as n - 16).
7979 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7980 fconst[sd] instruction, or -1 if X isn't suitable. */
7982 vfp3_const_double_index (rtx x)
7984 REAL_VALUE_TYPE r, m;
7986 unsigned HOST_WIDE_INT mantissa, mant_hi;
7987 unsigned HOST_WIDE_INT mask;
7988 HOST_WIDE_INT m1, m2;
7989 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7991 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
7994 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7996 /* We can't represent these things, so detect them first. */
7997 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8000 /* Extract sign, exponent and mantissa. */
8001 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8002 r = real_value_abs (&r);
8003 exponent = REAL_EXP (&r);
8004 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8005 highest (sign) bit, with a fixed binary point at bit point_pos.
8006 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8007 bits for the mantissa, this may fail (low bits would be lost). */
8008 real_ldexp (&m, &r, point_pos - exponent);
8009 REAL_VALUE_TO_INT (&m1, &m2, m);
8013 /* If there are bits set in the low part of the mantissa, we can't
8014 represent this value. */
8018 /* Now make it so that mantissa contains the most-significant bits, and move
8019 the point_pos to indicate that the least-significant bits have been
8021 point_pos -= HOST_BITS_PER_WIDE_INT;
8024 /* We can permit four significant bits of mantissa only, plus a high bit
8025 which is always 1. */
8026 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8027 if ((mantissa & mask) != 0)
8030 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8031 mantissa >>= point_pos - 5;
8033 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8034 floating-point immediate zero with Neon using an integer-zero load, but
8035 that case is handled elsewhere.) */
8039 gcc_assert (mantissa >= 16 && mantissa <= 31);
8041 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8042 normalized significands are in the range [1, 2). (Our mantissa is shifted
8043 left 4 places at this point relative to normalized IEEE754 values). GCC
8044 internally uses [0.5, 1) (see real.c), so the exponent returned from
8045 REAL_EXP must be altered. */
8046 exponent = 5 - exponent;
8048 if (exponent < 0 || exponent > 7)
8051 /* Sign, mantissa and exponent are now in the correct form to plug into the
8052 formula described in the comment above. */
8053 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8056 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8058 vfp3_const_double_rtx (rtx x)
8063 return vfp3_const_double_index (x) != -1;
8066 /* Recognize immediates which can be used in various Neon instructions. Legal
8067 immediates are described by the following table (for VMVN variants, the
8068 bitwise inverse of the constant shown is recognized. In either case, VMOV
8069 is output and the correct instruction to use for a given constant is chosen
8070 by the assembler). The constant shown is replicated across all elements of
8071 the destination vector.
8073 insn elems variant constant (binary)
8074 ---- ----- ------- -----------------
8075 vmov i32 0 00000000 00000000 00000000 abcdefgh
8076 vmov i32 1 00000000 00000000 abcdefgh 00000000
8077 vmov i32 2 00000000 abcdefgh 00000000 00000000
8078 vmov i32 3 abcdefgh 00000000 00000000 00000000
8079 vmov i16 4 00000000 abcdefgh
8080 vmov i16 5 abcdefgh 00000000
8081 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8082 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8083 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8084 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8085 vmvn i16 10 00000000 abcdefgh
8086 vmvn i16 11 abcdefgh 00000000
8087 vmov i32 12 00000000 00000000 abcdefgh 11111111
8088 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8089 vmov i32 14 00000000 abcdefgh 11111111 11111111
8090 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8092 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8093 eeeeeeee ffffffff gggggggg hhhhhhhh
8094 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8096 For case 18, B = !b. Representable values are exactly those accepted by
8097 vfp3_const_double_index, but are output as floating-point numbers rather
8100 Variants 0-5 (inclusive) may also be used as immediates for the second
8101 operand of VORR/VBIC instructions.
8103 The INVERSE argument causes the bitwise inverse of the given operand to be
8104 recognized instead (used for recognizing legal immediates for the VAND/VORN
8105 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8106 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8107 output, rather than the real insns vbic/vorr).
8109 INVERSE makes no difference to the recognition of float vectors.
8111 The return value is the variant of immediate as shown in the above table, or
8112 -1 if the given value doesn't match any of the listed patterns.
8115 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8116 rtx *modconst, int *elementwidth)
8118 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8120 for (i = 0; i < idx; i += (STRIDE)) \
8125 immtype = (CLASS); \
8126 elsize = (ELSIZE); \
8130 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8131 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8132 unsigned char bytes[16];
8133 int immtype = -1, matches;
8134 unsigned int invmask = inverse ? 0xff : 0;
8136 /* Vectors of float constants. */
8137 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8139 rtx el0 = CONST_VECTOR_ELT (op, 0);
8142 if (!vfp3_const_double_rtx (el0))
8145 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8147 for (i = 1; i < n_elts; i++)
8149 rtx elt = CONST_VECTOR_ELT (op, i);
8152 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8154 if (!REAL_VALUES_EQUAL (r0, re))
8159 *modconst = CONST_VECTOR_ELT (op, 0);
8167 /* Splat vector constant out into a byte vector. */
8168 for (i = 0; i < n_elts; i++)
8170 rtx el = CONST_VECTOR_ELT (op, i);
8171 unsigned HOST_WIDE_INT elpart;
8172 unsigned int part, parts;
8174 if (GET_CODE (el) == CONST_INT)
8176 elpart = INTVAL (el);
8179 else if (GET_CODE (el) == CONST_DOUBLE)
8181 elpart = CONST_DOUBLE_LOW (el);
8187 for (part = 0; part < parts; part++)
8190 for (byte = 0; byte < innersize; byte++)
8192 bytes[idx++] = (elpart & 0xff) ^ invmask;
8193 elpart >>= BITS_PER_UNIT;
8195 if (GET_CODE (el) == CONST_DOUBLE)
8196 elpart = CONST_DOUBLE_HIGH (el);
8201 gcc_assert (idx == GET_MODE_SIZE (mode));
8205 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8206 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8208 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8209 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8211 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8212 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8214 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8215 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8217 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8219 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8221 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8222 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8224 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8225 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8227 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8228 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8230 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8231 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8233 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8235 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8237 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8238 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8240 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8241 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8243 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8244 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8246 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8247 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8249 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8251 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8252 && bytes[i] == bytes[(i + 8) % idx]);
8260 *elementwidth = elsize;
8264 unsigned HOST_WIDE_INT imm = 0;
8266 /* Un-invert bytes of recognized vector, if necessary. */
8268 for (i = 0; i < idx; i++)
8269 bytes[i] ^= invmask;
8273 /* FIXME: Broken on 32-bit H_W_I hosts. */
8274 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8276 for (i = 0; i < 8; i++)
8277 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8278 << (i * BITS_PER_UNIT);
8280 *modconst = GEN_INT (imm);
8284 unsigned HOST_WIDE_INT imm = 0;
8286 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8287 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8289 *modconst = GEN_INT (imm);
8297 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8298 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8299 float elements), and a modified constant (whatever should be output for a
8300 VMOV) in *MODCONST. */
8303 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8304 rtx *modconst, int *elementwidth)
8308 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8314 *modconst = tmpconst;
8317 *elementwidth = tmpwidth;
8322 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8323 the immediate is valid, write a constant suitable for using as an operand
8324 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8325 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8328 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8329 rtx *modconst, int *elementwidth)
8333 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8335 if (retval < 0 || retval > 5)
8339 *modconst = tmpconst;
8342 *elementwidth = tmpwidth;
8347 /* Return a string suitable for output of Neon immediate logic operation
8351 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8352 int inverse, int quad)
8354 int width, is_valid;
8355 static char templ[40];
8357 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8359 gcc_assert (is_valid != 0);
8362 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8364 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8369 /* Output a sequence of pairwise operations to implement a reduction.
8370 NOTE: We do "too much work" here, because pairwise operations work on two
8371 registers-worth of operands in one go. Unfortunately we can't exploit those
8372 extra calculations to do the full operation in fewer steps, I don't think.
8373 Although all vector elements of the result but the first are ignored, we
8374 actually calculate the same result in each of the elements. An alternative
8375 such as initially loading a vector with zero to use as each of the second
8376 operands would use up an additional register and take an extra instruction,
8377 for no particular gain. */
8380 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8381 rtx (*reduc) (rtx, rtx, rtx))
8383 enum machine_mode inner = GET_MODE_INNER (mode);
8384 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8387 for (i = parts / 2; i >= 1; i /= 2)
8389 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8390 emit_insn (reduc (dest, tmpsum, tmpsum));
8395 /* If VALS is a vector constant that can be loaded into a register
8396 using VDUP, generate instructions to do so and return an RTX to
8397 assign to the register. Otherwise return NULL_RTX. */
8400 neon_vdup_constant (rtx vals)
8402 enum machine_mode mode = GET_MODE (vals);
8403 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8404 int n_elts = GET_MODE_NUNITS (mode);
8405 bool all_same = true;
8409 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8412 for (i = 0; i < n_elts; ++i)
8414 x = XVECEXP (vals, 0, i);
8415 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8420 /* The elements are not all the same. We could handle repeating
8421 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8422 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8426 /* We can load this constant by using VDUP and a constant in a
8427 single ARM register. This will be cheaper than a vector
8430 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8431 return gen_rtx_VEC_DUPLICATE (mode, x);
8434 /* Generate code to load VALS, which is a PARALLEL containing only
8435 constants (for vec_init) or CONST_VECTOR, efficiently into a
8436 register. Returns an RTX to copy into the register, or NULL_RTX
8437 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8440 neon_make_constant (rtx vals)
8442 enum machine_mode mode = GET_MODE (vals);
8444 rtx const_vec = NULL_RTX;
8445 int n_elts = GET_MODE_NUNITS (mode);
8449 if (GET_CODE (vals) == CONST_VECTOR)
8451 else if (GET_CODE (vals) == PARALLEL)
8453 /* A CONST_VECTOR must contain only CONST_INTs and
8454 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8455 Only store valid constants in a CONST_VECTOR. */
8456 for (i = 0; i < n_elts; ++i)
8458 rtx x = XVECEXP (vals, 0, i);
8459 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8462 if (n_const == n_elts)
8463 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8468 if (const_vec != NULL
8469 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8470 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8472 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8473 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8474 pipeline cycle; creating the constant takes one or two ARM
8477 else if (const_vec != NULL_RTX)
8478 /* Load from constant pool. On Cortex-A8 this takes two cycles
8479 (for either double or quad vectors). We can not take advantage
8480 of single-cycle VLD1 because we need a PC-relative addressing
8484 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8485 We can not construct an initializer. */
8489 /* Initialize vector TARGET to VALS. */
8492 neon_expand_vector_init (rtx target, rtx vals)
8494 enum machine_mode mode = GET_MODE (target);
8495 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8496 int n_elts = GET_MODE_NUNITS (mode);
8497 int n_var = 0, one_var = -1;
8498 bool all_same = true;
8502 for (i = 0; i < n_elts; ++i)
8504 x = XVECEXP (vals, 0, i);
8505 if (!CONSTANT_P (x))
8506 ++n_var, one_var = i;
8508 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8514 rtx constant = neon_make_constant (vals);
8515 if (constant != NULL_RTX)
8517 emit_move_insn (target, constant);
8522 /* Splat a single non-constant element if we can. */
8523 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8525 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8526 emit_insn (gen_rtx_SET (VOIDmode, target,
8527 gen_rtx_VEC_DUPLICATE (mode, x)));
8531 /* One field is non-constant. Load constant then overwrite varying
8532 field. This is more efficient than using the stack. */
8535 rtx copy = copy_rtx (vals);
8536 rtx index = GEN_INT (one_var);
8538 /* Load constant part of vector, substitute neighboring value for
8540 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8541 neon_expand_vector_init (target, copy);
8543 /* Insert variable. */
8544 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8548 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8551 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8554 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8557 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8560 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8563 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8566 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8569 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8572 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8580 /* Construct the vector in memory one field at a time
8581 and load the whole vector. */
8582 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8583 for (i = 0; i < n_elts; i++)
8584 emit_move_insn (adjust_address_nv (mem, inner_mode,
8585 i * GET_MODE_SIZE (inner_mode)),
8586 XVECEXP (vals, 0, i));
8587 emit_move_insn (target, mem);
8590 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8591 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8592 reported source locations are bogus. */
8595 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8600 gcc_assert (GET_CODE (operand) == CONST_INT);
8602 lane = INTVAL (operand);
8604 if (lane < low || lane >= high)
8608 /* Bounds-check lanes. */
8611 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8613 bounds_check (operand, low, high, "lane out of range");
8616 /* Bounds-check constants. */
8619 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8621 bounds_check (operand, low, high, "constant out of range");
8625 neon_element_bits (enum machine_mode mode)
8628 return GET_MODE_BITSIZE (mode);
8630 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8634 /* Predicates for `match_operand' and `match_operator'. */
8636 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8638 cirrus_memory_offset (rtx op)
8640 /* Reject eliminable registers. */
8641 if (! (reload_in_progress || reload_completed)
8642 && ( reg_mentioned_p (frame_pointer_rtx, op)
8643 || reg_mentioned_p (arg_pointer_rtx, op)
8644 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8645 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8646 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8647 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8650 if (GET_CODE (op) == MEM)
8656 /* Match: (mem (reg)). */
8657 if (GET_CODE (ind) == REG)
8663 if (GET_CODE (ind) == PLUS
8664 && GET_CODE (XEXP (ind, 0)) == REG
8665 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8666 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8673 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8674 WB is true if full writeback address modes are allowed and is false
8675 if limited writeback address modes (POST_INC and PRE_DEC) are
8679 arm_coproc_mem_operand (rtx op, bool wb)
8683 /* Reject eliminable registers. */
8684 if (! (reload_in_progress || reload_completed)
8685 && ( reg_mentioned_p (frame_pointer_rtx, op)
8686 || reg_mentioned_p (arg_pointer_rtx, op)
8687 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8688 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8689 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8690 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8693 /* Constants are converted into offsets from labels. */
8694 if (GET_CODE (op) != MEM)
8699 if (reload_completed
8700 && (GET_CODE (ind) == LABEL_REF
8701 || (GET_CODE (ind) == CONST
8702 && GET_CODE (XEXP (ind, 0)) == PLUS
8703 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8704 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8707 /* Match: (mem (reg)). */
8708 if (GET_CODE (ind) == REG)
8709 return arm_address_register_rtx_p (ind, 0);
8711 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8712 acceptable in any case (subject to verification by
8713 arm_address_register_rtx_p). We need WB to be true to accept
8714 PRE_INC and POST_DEC. */
8715 if (GET_CODE (ind) == POST_INC
8716 || GET_CODE (ind) == PRE_DEC
8718 && (GET_CODE (ind) == PRE_INC
8719 || GET_CODE (ind) == POST_DEC)))
8720 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8723 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8724 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8725 && GET_CODE (XEXP (ind, 1)) == PLUS
8726 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8727 ind = XEXP (ind, 1);
8732 if (GET_CODE (ind) == PLUS
8733 && GET_CODE (XEXP (ind, 0)) == REG
8734 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8735 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8736 && INTVAL (XEXP (ind, 1)) > -1024
8737 && INTVAL (XEXP (ind, 1)) < 1024
8738 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8744 /* Return TRUE if OP is a memory operand which we can load or store a vector
8745 to/from. TYPE is one of the following values:
8746 0 - Vector load/stor (vldr)
8747 1 - Core registers (ldm)
8748 2 - Element/structure loads (vld1)
8751 neon_vector_mem_operand (rtx op, int type)
8755 /* Reject eliminable registers. */
8756 if (! (reload_in_progress || reload_completed)
8757 && ( reg_mentioned_p (frame_pointer_rtx, op)
8758 || reg_mentioned_p (arg_pointer_rtx, op)
8759 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8760 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8761 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8762 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8765 /* Constants are converted into offsets from labels. */
8766 if (GET_CODE (op) != MEM)
8771 if (reload_completed
8772 && (GET_CODE (ind) == LABEL_REF
8773 || (GET_CODE (ind) == CONST
8774 && GET_CODE (XEXP (ind, 0)) == PLUS
8775 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8776 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8779 /* Match: (mem (reg)). */
8780 if (GET_CODE (ind) == REG)
8781 return arm_address_register_rtx_p (ind, 0);
8783 /* Allow post-increment with Neon registers. */
8784 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8785 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8787 /* FIXME: vld1 allows register post-modify. */
8793 && GET_CODE (ind) == PLUS
8794 && GET_CODE (XEXP (ind, 0)) == REG
8795 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8796 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8797 && INTVAL (XEXP (ind, 1)) > -1024
8798 && INTVAL (XEXP (ind, 1)) < 1016
8799 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8805 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8808 neon_struct_mem_operand (rtx op)
8812 /* Reject eliminable registers. */
8813 if (! (reload_in_progress || reload_completed)
8814 && ( reg_mentioned_p (frame_pointer_rtx, op)
8815 || reg_mentioned_p (arg_pointer_rtx, op)
8816 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8817 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8818 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8819 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8822 /* Constants are converted into offsets from labels. */
8823 if (GET_CODE (op) != MEM)
8828 if (reload_completed
8829 && (GET_CODE (ind) == LABEL_REF
8830 || (GET_CODE (ind) == CONST
8831 && GET_CODE (XEXP (ind, 0)) == PLUS
8832 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8833 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8836 /* Match: (mem (reg)). */
8837 if (GET_CODE (ind) == REG)
8838 return arm_address_register_rtx_p (ind, 0);
8843 /* Return true if X is a register that will be eliminated later on. */
8845 arm_eliminable_register (rtx x)
8847 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8848 || REGNO (x) == ARG_POINTER_REGNUM
8849 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8850 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8853 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8854 coprocessor registers. Otherwise return NO_REGS. */
8857 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8861 if (!TARGET_NEON_FP16)
8862 return GENERAL_REGS;
8863 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8865 return GENERAL_REGS;
8869 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8870 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8871 && neon_vector_mem_operand (x, 0))
8874 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8877 return GENERAL_REGS;
8880 /* Values which must be returned in the most-significant end of the return
8884 arm_return_in_msb (const_tree valtype)
8886 return (TARGET_AAPCS_BASED
8888 && (AGGREGATE_TYPE_P (valtype)
8889 || TREE_CODE (valtype) == COMPLEX_TYPE));
8892 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8893 Use by the Cirrus Maverick code which has to workaround
8894 a hardware bug triggered by such instructions. */
8896 arm_memory_load_p (rtx insn)
8898 rtx body, lhs, rhs;;
8900 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8903 body = PATTERN (insn);
8905 if (GET_CODE (body) != SET)
8908 lhs = XEXP (body, 0);
8909 rhs = XEXP (body, 1);
8911 lhs = REG_OR_SUBREG_RTX (lhs);
8913 /* If the destination is not a general purpose
8914 register we do not have to worry. */
8915 if (GET_CODE (lhs) != REG
8916 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8919 /* As well as loads from memory we also have to react
8920 to loads of invalid constants which will be turned
8921 into loads from the minipool. */
8922 return (GET_CODE (rhs) == MEM
8923 || GET_CODE (rhs) == SYMBOL_REF
8924 || note_invalid_constants (insn, -1, false));
8927 /* Return TRUE if INSN is a Cirrus instruction. */
8929 arm_cirrus_insn_p (rtx insn)
8931 enum attr_cirrus attr;
8933 /* get_attr cannot accept USE or CLOBBER. */
8935 || GET_CODE (insn) != INSN
8936 || GET_CODE (PATTERN (insn)) == USE
8937 || GET_CODE (PATTERN (insn)) == CLOBBER)
8940 attr = get_attr_cirrus (insn);
8942 return attr != CIRRUS_NOT;
8945 /* Cirrus reorg for invalid instruction combinations. */
8947 cirrus_reorg (rtx first)
8949 enum attr_cirrus attr;
8950 rtx body = PATTERN (first);
8954 /* Any branch must be followed by 2 non Cirrus instructions. */
8955 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8958 t = next_nonnote_insn (first);
8960 if (arm_cirrus_insn_p (t))
8963 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8967 emit_insn_after (gen_nop (), first);
8972 /* (float (blah)) is in parallel with a clobber. */
8973 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
8974 body = XVECEXP (body, 0, 0);
8976 if (GET_CODE (body) == SET)
8978 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
8980 /* cfldrd, cfldr64, cfstrd, cfstr64 must
8981 be followed by a non Cirrus insn. */
8982 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
8984 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
8985 emit_insn_after (gen_nop (), first);
8989 else if (arm_memory_load_p (first))
8991 unsigned int arm_regno;
8993 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8994 ldr/cfmv64hr combination where the Rd field is the same
8995 in both instructions must be split with a non Cirrus
9002 /* Get Arm register number for ldr insn. */
9003 if (GET_CODE (lhs) == REG)
9004 arm_regno = REGNO (lhs);
9007 gcc_assert (GET_CODE (rhs) == REG);
9008 arm_regno = REGNO (rhs);
9012 first = next_nonnote_insn (first);
9014 if (! arm_cirrus_insn_p (first))
9017 body = PATTERN (first);
9019 /* (float (blah)) is in parallel with a clobber. */
9020 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9021 body = XVECEXP (body, 0, 0);
9023 if (GET_CODE (body) == FLOAT)
9024 body = XEXP (body, 0);
9026 if (get_attr_cirrus (first) == CIRRUS_MOVE
9027 && GET_CODE (XEXP (body, 1)) == REG
9028 && arm_regno == REGNO (XEXP (body, 1)))
9029 emit_insn_after (gen_nop (), first);
9035 /* get_attr cannot accept USE or CLOBBER. */
9037 || GET_CODE (first) != INSN
9038 || GET_CODE (PATTERN (first)) == USE
9039 || GET_CODE (PATTERN (first)) == CLOBBER)
9042 attr = get_attr_cirrus (first);
9044 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9045 must be followed by a non-coprocessor instruction. */
9046 if (attr == CIRRUS_COMPARE)
9050 t = next_nonnote_insn (first);
9052 if (arm_cirrus_insn_p (t))
9055 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9059 emit_insn_after (gen_nop (), first);
9065 /* Return TRUE if X references a SYMBOL_REF. */
9067 symbol_mentioned_p (rtx x)
9072 if (GET_CODE (x) == SYMBOL_REF)
9075 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9076 are constant offsets, not symbols. */
9077 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9080 fmt = GET_RTX_FORMAT (GET_CODE (x));
9082 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9088 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9089 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9092 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9099 /* Return TRUE if X references a LABEL_REF. */
9101 label_mentioned_p (rtx x)
9106 if (GET_CODE (x) == LABEL_REF)
9109 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9110 instruction, but they are constant offsets, not symbols. */
9111 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9114 fmt = GET_RTX_FORMAT (GET_CODE (x));
9115 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9121 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9122 if (label_mentioned_p (XVECEXP (x, i, j)))
9125 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9133 tls_mentioned_p (rtx x)
9135 switch (GET_CODE (x))
9138 return tls_mentioned_p (XEXP (x, 0));
9141 if (XINT (x, 1) == UNSPEC_TLS)
9149 /* Must not copy any rtx that uses a pc-relative address. */
9152 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9154 if (GET_CODE (*x) == UNSPEC
9155 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9161 arm_cannot_copy_insn_p (rtx insn)
9163 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9169 enum rtx_code code = GET_CODE (x);
9186 /* Return 1 if memory locations are adjacent. */
9188 adjacent_mem_locations (rtx a, rtx b)
9190 /* We don't guarantee to preserve the order of these memory refs. */
9191 if (volatile_refs_p (a) || volatile_refs_p (b))
9194 if ((GET_CODE (XEXP (a, 0)) == REG
9195 || (GET_CODE (XEXP (a, 0)) == PLUS
9196 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9197 && (GET_CODE (XEXP (b, 0)) == REG
9198 || (GET_CODE (XEXP (b, 0)) == PLUS
9199 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9201 HOST_WIDE_INT val0 = 0, val1 = 0;
9205 if (GET_CODE (XEXP (a, 0)) == PLUS)
9207 reg0 = XEXP (XEXP (a, 0), 0);
9208 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9213 if (GET_CODE (XEXP (b, 0)) == PLUS)
9215 reg1 = XEXP (XEXP (b, 0), 0);
9216 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9221 /* Don't accept any offset that will require multiple
9222 instructions to handle, since this would cause the
9223 arith_adjacentmem pattern to output an overlong sequence. */
9224 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9227 /* Don't allow an eliminable register: register elimination can make
9228 the offset too large. */
9229 if (arm_eliminable_register (reg0))
9232 val_diff = val1 - val0;
9236 /* If the target has load delay slots, then there's no benefit
9237 to using an ldm instruction unless the offset is zero and
9238 we are optimizing for size. */
9239 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9240 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9241 && (val_diff == 4 || val_diff == -4));
9244 return ((REGNO (reg0) == REGNO (reg1))
9245 && (val_diff == 4 || val_diff == -4));
9251 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9252 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9253 instruction. ADD_OFFSET is nonzero if the base address register needs
9254 to be modified with an add instruction before we can use it. */
9257 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9258 int nops, HOST_WIDE_INT add_offset)
9260 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9261 if the offset isn't small enough. The reason 2 ldrs are faster
9262 is because these ARMs are able to do more than one cache access
9263 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9264 whilst the ARM8 has a double bandwidth cache. This means that
9265 these cores can do both an instruction fetch and a data fetch in
9266 a single cycle, so the trick of calculating the address into a
9267 scratch register (one of the result regs) and then doing a load
9268 multiple actually becomes slower (and no smaller in code size).
9269 That is the transformation
9271 ldr rd1, [rbase + offset]
9272 ldr rd2, [rbase + offset + 4]
9276 add rd1, rbase, offset
9277 ldmia rd1, {rd1, rd2}
9279 produces worse code -- '3 cycles + any stalls on rd2' instead of
9280 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9281 access per cycle, the first sequence could never complete in less
9282 than 6 cycles, whereas the ldm sequence would only take 5 and
9283 would make better use of sequential accesses if not hitting the
9286 We cheat here and test 'arm_ld_sched' which we currently know to
9287 only be true for the ARM8, ARM9 and StrongARM. If this ever
9288 changes, then the test below needs to be reworked. */
9289 if (nops == 2 && arm_ld_sched && add_offset != 0)
9292 /* XScale has load-store double instructions, but they have stricter
9293 alignment requirements than load-store multiple, so we cannot
9296 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9297 the pipeline until completion.
9305 An ldr instruction takes 1-3 cycles, but does not block the
9314 Best case ldr will always win. However, the more ldr instructions
9315 we issue, the less likely we are to be able to schedule them well.
9316 Using ldr instructions also increases code size.
9318 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9319 for counts of 3 or 4 regs. */
9320 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9325 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9326 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9327 an array ORDER which describes the sequence to use when accessing the
9328 offsets that produces an ascending order. In this sequence, each
9329 offset must be larger by exactly 4 than the previous one. ORDER[0]
9330 must have been filled in with the lowest offset by the caller.
9331 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9332 we use to verify that ORDER produces an ascending order of registers.
9333 Return true if it was possible to construct such an order, false if
9337 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9341 for (i = 1; i < nops; i++)
9345 order[i] = order[i - 1];
9346 for (j = 0; j < nops; j++)
9347 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9349 /* We must find exactly one offset that is higher than the
9350 previous one by 4. */
9351 if (order[i] != order[i - 1])
9355 if (order[i] == order[i - 1])
9357 /* The register numbers must be ascending. */
9358 if (unsorted_regs != NULL
9359 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9365 /* Used to determine in a peephole whether a sequence of load
9366 instructions can be changed into a load-multiple instruction.
9367 NOPS is the number of separate load instructions we are examining. The
9368 first NOPS entries in OPERANDS are the destination registers, the
9369 next NOPS entries are memory operands. If this function is
9370 successful, *BASE is set to the common base register of the memory
9371 accesses; *LOAD_OFFSET is set to the first memory location's offset
9372 from that base register.
9373 REGS is an array filled in with the destination register numbers.
9374 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9375 insn numbers to to an ascending order of stores. If CHECK_REGS is true,
9376 the sequence of registers in REGS matches the loads from ascending memory
9377 locations, and the function verifies that the register numbers are
9378 themselves ascending. If CHECK_REGS is false, the register numbers
9379 are stored in the order they are found in the operands. */
9381 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9382 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9384 int unsorted_regs[MAX_LDM_STM_OPS];
9385 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9386 int order[MAX_LDM_STM_OPS];
9387 rtx base_reg_rtx = NULL;
9391 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9392 easily extended if required. */
9393 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9395 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9397 /* Loop over the operands and check that the memory references are
9398 suitable (i.e. immediate offsets from the same base register). At
9399 the same time, extract the target register, and the memory
9401 for (i = 0; i < nops; i++)
9406 /* Convert a subreg of a mem into the mem itself. */
9407 if (GET_CODE (operands[nops + i]) == SUBREG)
9408 operands[nops + i] = alter_subreg (operands + (nops + i));
9410 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9412 /* Don't reorder volatile memory references; it doesn't seem worth
9413 looking for the case where the order is ok anyway. */
9414 if (MEM_VOLATILE_P (operands[nops + i]))
9417 offset = const0_rtx;
9419 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9420 || (GET_CODE (reg) == SUBREG
9421 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9422 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9423 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9425 || (GET_CODE (reg) == SUBREG
9426 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9427 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9432 base_reg = REGNO (reg);
9434 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9437 else if (base_reg != (int) REGNO (reg))
9438 /* Not addressed from the same base register. */
9441 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9442 ? REGNO (operands[i])
9443 : REGNO (SUBREG_REG (operands[i])));
9445 /* If it isn't an integer register, or if it overwrites the
9446 base register but isn't the last insn in the list, then
9447 we can't do this. */
9448 if (unsorted_regs[i] < 0
9449 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9450 || unsorted_regs[i] > 14
9451 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9454 unsorted_offsets[i] = INTVAL (offset);
9455 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9459 /* Not a suitable memory address. */
9463 /* All the useful information has now been extracted from the
9464 operands into unsorted_regs and unsorted_offsets; additionally,
9465 order[0] has been set to the lowest offset in the list. Sort
9466 the offsets into order, verifying that they are adjacent, and
9467 check that the register numbers are ascending. */
9468 if (!compute_offset_order (nops, unsorted_offsets, order,
9469 check_regs ? unsorted_regs : NULL))
9473 memcpy (saved_order, order, sizeof order);
9479 for (i = 0; i < nops; i++)
9480 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9482 *load_offset = unsorted_offsets[order[0]];
9486 && !peep2_reg_dead_p (nops, base_reg_rtx))
9489 if (unsorted_offsets[order[0]] == 0)
9490 ldm_case = 1; /* ldmia */
9491 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9492 ldm_case = 2; /* ldmib */
9493 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9494 ldm_case = 3; /* ldmda */
9495 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9496 ldm_case = 4; /* ldmdb */
9497 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9498 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9503 if (!multiple_operation_profitable_p (false, nops,
9505 ? unsorted_offsets[order[0]] : 0))
9511 /* Used to determine in a peephole whether a sequence of store instructions can
9512 be changed into a store-multiple instruction.
9513 NOPS is the number of separate store instructions we are examining.
9514 NOPS_TOTAL is the total number of instructions recognized by the peephole
9516 The first NOPS entries in OPERANDS are the source registers, the next
9517 NOPS entries are memory operands. If this function is successful, *BASE is
9518 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9519 to the first memory location's offset from that base register. REGS is an
9520 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9521 likewise filled with the corresponding rtx's.
9522 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9523 numbers to to an ascending order of stores.
9524 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9525 from ascending memory locations, and the function verifies that the register
9526 numbers are themselves ascending. If CHECK_REGS is false, the register
9527 numbers are stored in the order they are found in the operands. */
9529 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9530 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9531 HOST_WIDE_INT *load_offset, bool check_regs)
9533 int unsorted_regs[MAX_LDM_STM_OPS];
9534 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9535 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9536 int order[MAX_LDM_STM_OPS];
9538 rtx base_reg_rtx = NULL;
9541 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9542 easily extended if required. */
9543 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9545 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9547 /* Loop over the operands and check that the memory references are
9548 suitable (i.e. immediate offsets from the same base register). At
9549 the same time, extract the target register, and the memory
9551 for (i = 0; i < nops; i++)
9556 /* Convert a subreg of a mem into the mem itself. */
9557 if (GET_CODE (operands[nops + i]) == SUBREG)
9558 operands[nops + i] = alter_subreg (operands + (nops + i));
9560 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9562 /* Don't reorder volatile memory references; it doesn't seem worth
9563 looking for the case where the order is ok anyway. */
9564 if (MEM_VOLATILE_P (operands[nops + i]))
9567 offset = const0_rtx;
9569 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9570 || (GET_CODE (reg) == SUBREG
9571 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9572 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9573 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9575 || (GET_CODE (reg) == SUBREG
9576 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9577 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9580 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9581 ? operands[i] : SUBREG_REG (operands[i]));
9582 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9586 base_reg = REGNO (reg);
9588 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9591 else if (base_reg != (int) REGNO (reg))
9592 /* Not addressed from the same base register. */
9595 /* If it isn't an integer register, then we can't do this. */
9596 if (unsorted_regs[i] < 0
9597 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9598 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9599 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9600 || unsorted_regs[i] > 14)
9603 unsorted_offsets[i] = INTVAL (offset);
9604 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9608 /* Not a suitable memory address. */
9612 /* All the useful information has now been extracted from the
9613 operands into unsorted_regs and unsorted_offsets; additionally,
9614 order[0] has been set to the lowest offset in the list. Sort
9615 the offsets into order, verifying that they are adjacent, and
9616 check that the register numbers are ascending. */
9617 if (!compute_offset_order (nops, unsorted_offsets, order,
9618 check_regs ? unsorted_regs : NULL))
9622 memcpy (saved_order, order, sizeof order);
9628 for (i = 0; i < nops; i++)
9630 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9632 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9635 *load_offset = unsorted_offsets[order[0]];
9639 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9642 if (unsorted_offsets[order[0]] == 0)
9643 stm_case = 1; /* stmia */
9644 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9645 stm_case = 2; /* stmib */
9646 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9647 stm_case = 3; /* stmda */
9648 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9649 stm_case = 4; /* stmdb */
9653 if (!multiple_operation_profitable_p (false, nops, 0))
9659 /* Routines for use in generating RTL. */
9661 /* Generate a load-multiple instruction. COUNT is the number of loads in
9662 the instruction; REGS and MEMS are arrays containing the operands.
9663 BASEREG is the base register to be used in addressing the memory operands.
9664 WBACK_OFFSET is nonzero if the instruction should update the base
9668 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9669 HOST_WIDE_INT wback_offset)
9674 if (!multiple_operation_profitable_p (false, count, 0))
9680 for (i = 0; i < count; i++)
9681 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9683 if (wback_offset != 0)
9684 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9692 result = gen_rtx_PARALLEL (VOIDmode,
9693 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9694 if (wback_offset != 0)
9696 XVECEXP (result, 0, 0)
9697 = gen_rtx_SET (VOIDmode, basereg,
9698 plus_constant (basereg, wback_offset));
9703 for (j = 0; i < count; i++, j++)
9704 XVECEXP (result, 0, i)
9705 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9710 /* Generate a store-multiple instruction. COUNT is the number of stores in
9711 the instruction; REGS and MEMS are arrays containing the operands.
9712 BASEREG is the base register to be used in addressing the memory operands.
9713 WBACK_OFFSET is nonzero if the instruction should update the base
9717 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9718 HOST_WIDE_INT wback_offset)
9723 if (GET_CODE (basereg) == PLUS)
9724 basereg = XEXP (basereg, 0);
9726 if (!multiple_operation_profitable_p (false, count, 0))
9732 for (i = 0; i < count; i++)
9733 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
9735 if (wback_offset != 0)
9736 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9744 result = gen_rtx_PARALLEL (VOIDmode,
9745 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9746 if (wback_offset != 0)
9748 XVECEXP (result, 0, 0)
9749 = gen_rtx_SET (VOIDmode, basereg,
9750 plus_constant (basereg, wback_offset));
9755 for (j = 0; i < count; i++, j++)
9756 XVECEXP (result, 0, i)
9757 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
9762 /* Generate either a load-multiple or a store-multiple instruction. This
9763 function can be used in situations where we can start with a single MEM
9764 rtx and adjust its address upwards.
9765 COUNT is the number of operations in the instruction, not counting a
9766 possible update of the base register. REGS is an array containing the
9768 BASEREG is the base register to be used in addressing the memory operands,
9769 which are constructed from BASEMEM.
9770 WRITE_BACK specifies whether the generated instruction should include an
9771 update of the base register.
9772 OFFSETP is used to pass an offset to and from this function; this offset
9773 is not used when constructing the address (instead BASEMEM should have an
9774 appropriate offset in its address), it is used only for setting
9775 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9778 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
9779 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9781 rtx mems[MAX_LDM_STM_OPS];
9782 HOST_WIDE_INT offset = *offsetp;
9785 gcc_assert (count <= MAX_LDM_STM_OPS);
9787 if (GET_CODE (basereg) == PLUS)
9788 basereg = XEXP (basereg, 0);
9790 for (i = 0; i < count; i++)
9792 rtx addr = plus_constant (basereg, i * 4);
9793 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9801 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
9802 write_back ? 4 * count : 0);
9804 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
9805 write_back ? 4 * count : 0);
9809 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
9810 rtx basemem, HOST_WIDE_INT *offsetp)
9812 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
9817 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
9818 rtx basemem, HOST_WIDE_INT *offsetp)
9820 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
9824 /* Called from a peephole2 expander to turn a sequence of loads into an
9825 LDM instruction. OPERANDS are the operands found by the peephole matcher;
9826 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
9827 is true if we can reorder the registers because they are used commutatively
9829 Returns true iff we could generate a new instruction. */
9832 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
9834 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
9835 rtx mems[MAX_LDM_STM_OPS];
9838 HOST_WIDE_INT offset;
9839 int write_back = FALSE;
9843 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
9844 &base_reg, &offset, !sort_regs);
9850 for (i = 0; i < nops - 1; i++)
9851 for (j = i + 1; j < nops; j++)
9852 if (regs[i] > regs[j])
9858 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
9862 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
9863 gcc_assert (ldm_case == 1 || ldm_case == 5);
9869 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
9870 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
9875 base_reg_rtx = newbase;
9879 for (i = 0; i < nops; i++)
9881 addr = plus_constant (base_reg_rtx, offset + i * 4);
9882 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
9885 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
9886 write_back ? offset + i * 4 : 0));
9890 /* Called from a peephole2 expander to turn a sequence of stores into an
9891 STM instruction. OPERANDS are the operands found by the peephole matcher;
9892 NOPS indicates how many separate stores we are trying to combine.
9893 Returns true iff we could generate a new instruction. */
9896 gen_stm_seq (rtx *operands, int nops)
9899 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
9900 rtx mems[MAX_LDM_STM_OPS];
9903 HOST_WIDE_INT offset;
9904 int write_back = FALSE;
9909 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
9910 mem_order, &base_reg, &offset, true);
9915 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
9917 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
9920 gcc_assert (base_reg_dies);
9926 gcc_assert (base_reg_dies);
9927 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
9931 addr = plus_constant (base_reg_rtx, offset);
9933 for (i = 0; i < nops; i++)
9935 addr = plus_constant (base_reg_rtx, offset + i * 4);
9936 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
9939 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
9940 write_back ? offset + i * 4 : 0));
9944 /* Called from a peephole2 expander to turn a sequence of stores that are
9945 preceded by constant loads into an STM instruction. OPERANDS are the
9946 operands found by the peephole matcher; NOPS indicates how many
9947 separate stores we are trying to combine; there are 2 * NOPS
9948 instructions in the peephole.
9949 Returns true iff we could generate a new instruction. */
9952 gen_const_stm_seq (rtx *operands, int nops)
9954 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
9955 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
9956 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
9957 rtx mems[MAX_LDM_STM_OPS];
9960 HOST_WIDE_INT offset;
9961 int write_back = FALSE;
9966 HARD_REG_SET allocated;
9968 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
9969 mem_order, &base_reg, &offset, false);
9974 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
9976 /* If the same register is used more than once, try to find a free
9978 CLEAR_HARD_REG_SET (allocated);
9979 for (i = 0; i < nops; i++)
9981 for (j = i + 1; j < nops; j++)
9982 if (regs[i] == regs[j])
9984 rtx t = peep2_find_free_register (0, nops * 2,
9985 TARGET_THUMB1 ? "l" : "r",
9986 SImode, &allocated);
9990 regs[i] = REGNO (t);
9994 /* Compute an ordering that maps the register numbers to an ascending
9997 for (i = 0; i < nops; i++)
9998 if (regs[i] < regs[reg_order[0]])
10001 for (i = 1; i < nops; i++)
10003 int this_order = reg_order[i - 1];
10004 for (j = 0; j < nops; j++)
10005 if (regs[j] > regs[reg_order[i - 1]]
10006 && (this_order == reg_order[i - 1]
10007 || regs[j] < regs[this_order]))
10009 reg_order[i] = this_order;
10012 /* Ensure that registers that must be live after the instruction end
10013 up with the correct value. */
10014 for (i = 0; i < nops; i++)
10016 int this_order = reg_order[i];
10017 if ((this_order != mem_order[i]
10018 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10019 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10023 /* Load the constants. */
10024 for (i = 0; i < nops; i++)
10026 rtx op = operands[2 * nops + mem_order[i]];
10027 sorted_regs[i] = regs[reg_order[i]];
10028 emit_move_insn (reg_rtxs[reg_order[i]], op);
10031 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10033 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10036 gcc_assert (base_reg_dies);
10042 gcc_assert (base_reg_dies);
10043 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10047 addr = plus_constant (base_reg_rtx, offset);
10049 for (i = 0; i < nops; i++)
10051 addr = plus_constant (base_reg_rtx, offset + i * 4);
10052 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10055 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10056 write_back ? offset + i * 4 : 0));
10061 arm_gen_movmemqi (rtx *operands)
10063 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10064 HOST_WIDE_INT srcoffset, dstoffset;
10066 rtx src, dst, srcbase, dstbase;
10067 rtx part_bytes_reg = NULL;
10070 if (GET_CODE (operands[2]) != CONST_INT
10071 || GET_CODE (operands[3]) != CONST_INT
10072 || INTVAL (operands[2]) > 64
10073 || INTVAL (operands[3]) & 3)
10076 dstbase = operands[0];
10077 srcbase = operands[1];
10079 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10080 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10082 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10083 out_words_to_go = INTVAL (operands[2]) / 4;
10084 last_bytes = INTVAL (operands[2]) & 3;
10085 dstoffset = srcoffset = 0;
10087 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10088 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10090 for (i = 0; in_words_to_go >= 2; i+=4)
10092 if (in_words_to_go > 4)
10093 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10094 TRUE, srcbase, &srcoffset));
10096 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10097 src, FALSE, srcbase,
10100 if (out_words_to_go)
10102 if (out_words_to_go > 4)
10103 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10104 TRUE, dstbase, &dstoffset));
10105 else if (out_words_to_go != 1)
10106 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10107 out_words_to_go, dst,
10110 dstbase, &dstoffset));
10113 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10114 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10115 if (last_bytes != 0)
10117 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10123 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10124 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10127 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10128 if (out_words_to_go)
10132 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10133 sreg = copy_to_reg (mem);
10135 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10136 emit_move_insn (mem, sreg);
10139 gcc_assert (!in_words_to_go); /* Sanity check */
10142 if (in_words_to_go)
10144 gcc_assert (in_words_to_go > 0);
10146 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10147 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10150 gcc_assert (!last_bytes || part_bytes_reg);
10152 if (BYTES_BIG_ENDIAN && last_bytes)
10154 rtx tmp = gen_reg_rtx (SImode);
10156 /* The bytes we want are in the top end of the word. */
10157 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10158 GEN_INT (8 * (4 - last_bytes))));
10159 part_bytes_reg = tmp;
10163 mem = adjust_automodify_address (dstbase, QImode,
10164 plus_constant (dst, last_bytes - 1),
10165 dstoffset + last_bytes - 1);
10166 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10170 tmp = gen_reg_rtx (SImode);
10171 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10172 part_bytes_reg = tmp;
10179 if (last_bytes > 1)
10181 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10182 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10186 rtx tmp = gen_reg_rtx (SImode);
10187 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10188 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10189 part_bytes_reg = tmp;
10196 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10197 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10204 /* Select a dominance comparison mode if possible for a test of the general
10205 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10206 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10207 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10208 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10209 In all cases OP will be either EQ or NE, but we don't need to know which
10210 here. If we are unable to support a dominance comparison we return
10211 CC mode. This will then fail to match for the RTL expressions that
10212 generate this call. */
10214 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10216 enum rtx_code cond1, cond2;
10219 /* Currently we will probably get the wrong result if the individual
10220 comparisons are not simple. This also ensures that it is safe to
10221 reverse a comparison if necessary. */
10222 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10224 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10228 /* The if_then_else variant of this tests the second condition if the
10229 first passes, but is true if the first fails. Reverse the first
10230 condition to get a true "inclusive-or" expression. */
10231 if (cond_or == DOM_CC_NX_OR_Y)
10232 cond1 = reverse_condition (cond1);
10234 /* If the comparisons are not equal, and one doesn't dominate the other,
10235 then we can't do this. */
10237 && !comparison_dominates_p (cond1, cond2)
10238 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10243 enum rtx_code temp = cond1;
10251 if (cond_or == DOM_CC_X_AND_Y)
10256 case EQ: return CC_DEQmode;
10257 case LE: return CC_DLEmode;
10258 case LEU: return CC_DLEUmode;
10259 case GE: return CC_DGEmode;
10260 case GEU: return CC_DGEUmode;
10261 default: gcc_unreachable ();
10265 if (cond_or == DOM_CC_X_AND_Y)
10277 gcc_unreachable ();
10281 if (cond_or == DOM_CC_X_AND_Y)
10293 gcc_unreachable ();
10297 if (cond_or == DOM_CC_X_AND_Y)
10298 return CC_DLTUmode;
10303 return CC_DLTUmode;
10305 return CC_DLEUmode;
10309 gcc_unreachable ();
10313 if (cond_or == DOM_CC_X_AND_Y)
10314 return CC_DGTUmode;
10319 return CC_DGTUmode;
10321 return CC_DGEUmode;
10325 gcc_unreachable ();
10328 /* The remaining cases only occur when both comparisons are the
10331 gcc_assert (cond1 == cond2);
10335 gcc_assert (cond1 == cond2);
10339 gcc_assert (cond1 == cond2);
10343 gcc_assert (cond1 == cond2);
10344 return CC_DLEUmode;
10347 gcc_assert (cond1 == cond2);
10348 return CC_DGEUmode;
10351 gcc_unreachable ();
10356 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10358 /* All floating point compares return CCFP if it is an equality
10359 comparison, and CCFPE otherwise. */
10360 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10380 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10385 gcc_unreachable ();
10389 /* A compare with a shifted operand. Because of canonicalization, the
10390 comparison will have to be swapped when we emit the assembler. */
10391 if (GET_MODE (y) == SImode
10392 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10393 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10394 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10395 || GET_CODE (x) == ROTATERT))
10398 /* This operation is performed swapped, but since we only rely on the Z
10399 flag we don't need an additional mode. */
10400 if (GET_MODE (y) == SImode
10401 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10402 && GET_CODE (x) == NEG
10403 && (op == EQ || op == NE))
10406 /* This is a special case that is used by combine to allow a
10407 comparison of a shifted byte load to be split into a zero-extend
10408 followed by a comparison of the shifted integer (only valid for
10409 equalities and unsigned inequalities). */
10410 if (GET_MODE (x) == SImode
10411 && GET_CODE (x) == ASHIFT
10412 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10413 && GET_CODE (XEXP (x, 0)) == SUBREG
10414 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10415 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10416 && (op == EQ || op == NE
10417 || op == GEU || op == GTU || op == LTU || op == LEU)
10418 && GET_CODE (y) == CONST_INT)
10421 /* A construct for a conditional compare, if the false arm contains
10422 0, then both conditions must be true, otherwise either condition
10423 must be true. Not all conditions are possible, so CCmode is
10424 returned if it can't be done. */
10425 if (GET_CODE (x) == IF_THEN_ELSE
10426 && (XEXP (x, 2) == const0_rtx
10427 || XEXP (x, 2) == const1_rtx)
10428 && COMPARISON_P (XEXP (x, 0))
10429 && COMPARISON_P (XEXP (x, 1)))
10430 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10431 INTVAL (XEXP (x, 2)));
10433 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10434 if (GET_CODE (x) == AND
10435 && COMPARISON_P (XEXP (x, 0))
10436 && COMPARISON_P (XEXP (x, 1)))
10437 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10440 if (GET_CODE (x) == IOR
10441 && COMPARISON_P (XEXP (x, 0))
10442 && COMPARISON_P (XEXP (x, 1)))
10443 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10446 /* An operation (on Thumb) where we want to test for a single bit.
10447 This is done by shifting that bit up into the top bit of a
10448 scratch register; we can then branch on the sign bit. */
10450 && GET_MODE (x) == SImode
10451 && (op == EQ || op == NE)
10452 && GET_CODE (x) == ZERO_EXTRACT
10453 && XEXP (x, 1) == const1_rtx)
10456 /* An operation that sets the condition codes as a side-effect, the
10457 V flag is not set correctly, so we can only use comparisons where
10458 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10460 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10461 if (GET_MODE (x) == SImode
10463 && (op == EQ || op == NE || op == LT || op == GE)
10464 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10465 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10466 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10467 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10468 || GET_CODE (x) == LSHIFTRT
10469 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10470 || GET_CODE (x) == ROTATERT
10471 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10472 return CC_NOOVmode;
10474 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10477 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10478 && GET_CODE (x) == PLUS
10479 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10482 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10484 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10486 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10493 /* A DImode comparison against zero can be implemented by
10494 or'ing the two halves together. */
10495 if (y == const0_rtx)
10498 /* We can do an equality test in three Thumb instructions. */
10508 /* DImode unsigned comparisons can be implemented by cmp +
10509 cmpeq without a scratch register. Not worth doing in
10520 /* DImode signed and unsigned comparisons can be implemented
10521 by cmp + sbcs with a scratch register, but that does not
10522 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10523 gcc_assert (op != EQ && op != NE);
10527 gcc_unreachable ();
10534 /* X and Y are two things to compare using CODE. Emit the compare insn and
10535 return the rtx for register 0 in the proper mode. FP means this is a
10536 floating point compare: I don't think that it is needed on the arm. */
10538 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10540 enum machine_mode mode;
10542 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10544 /* We might have X as a constant, Y as a register because of the predicates
10545 used for cmpdi. If so, force X to a register here. */
10546 if (dimode_comparison && !REG_P (x))
10547 x = force_reg (DImode, x);
10549 mode = SELECT_CC_MODE (code, x, y);
10550 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10552 if (dimode_comparison
10553 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10554 && mode != CC_CZmode)
10558 /* To compare two non-zero values for equality, XOR them and
10559 then compare against zero. Not used for ARM mode; there
10560 CC_CZmode is cheaper. */
10561 if (mode == CC_Zmode && y != const0_rtx)
10563 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10566 /* A scratch register is required. */
10567 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10568 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10569 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10572 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10577 /* Generate a sequence of insns that will generate the correct return
10578 address mask depending on the physical architecture that the program
10581 arm_gen_return_addr_mask (void)
10583 rtx reg = gen_reg_rtx (Pmode);
10585 emit_insn (gen_return_addr_mask (reg));
10590 arm_reload_in_hi (rtx *operands)
10592 rtx ref = operands[1];
10594 HOST_WIDE_INT offset = 0;
10596 if (GET_CODE (ref) == SUBREG)
10598 offset = SUBREG_BYTE (ref);
10599 ref = SUBREG_REG (ref);
10602 if (GET_CODE (ref) == REG)
10604 /* We have a pseudo which has been spilt onto the stack; there
10605 are two cases here: the first where there is a simple
10606 stack-slot replacement and a second where the stack-slot is
10607 out of range, or is used as a subreg. */
10608 if (reg_equiv_mem[REGNO (ref)])
10610 ref = reg_equiv_mem[REGNO (ref)];
10611 base = find_replacement (&XEXP (ref, 0));
10614 /* The slot is out of range, or was dressed up in a SUBREG. */
10615 base = reg_equiv_address[REGNO (ref)];
10618 base = find_replacement (&XEXP (ref, 0));
10620 /* Handle the case where the address is too complex to be offset by 1. */
10621 if (GET_CODE (base) == MINUS
10622 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10624 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10626 emit_set_insn (base_plus, base);
10629 else if (GET_CODE (base) == PLUS)
10631 /* The addend must be CONST_INT, or we would have dealt with it above. */
10632 HOST_WIDE_INT hi, lo;
10634 offset += INTVAL (XEXP (base, 1));
10635 base = XEXP (base, 0);
10637 /* Rework the address into a legal sequence of insns. */
10638 /* Valid range for lo is -4095 -> 4095 */
10641 : -((-offset) & 0xfff));
10643 /* Corner case, if lo is the max offset then we would be out of range
10644 once we have added the additional 1 below, so bump the msb into the
10645 pre-loading insn(s). */
10649 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10650 ^ (HOST_WIDE_INT) 0x80000000)
10651 - (HOST_WIDE_INT) 0x80000000);
10653 gcc_assert (hi + lo == offset);
10657 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10659 /* Get the base address; addsi3 knows how to handle constants
10660 that require more than one insn. */
10661 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10667 /* Operands[2] may overlap operands[0] (though it won't overlap
10668 operands[1]), that's why we asked for a DImode reg -- so we can
10669 use the bit that does not overlap. */
10670 if (REGNO (operands[2]) == REGNO (operands[0]))
10671 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10673 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10675 emit_insn (gen_zero_extendqisi2 (scratch,
10676 gen_rtx_MEM (QImode,
10677 plus_constant (base,
10679 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10680 gen_rtx_MEM (QImode,
10681 plus_constant (base,
10683 if (!BYTES_BIG_ENDIAN)
10684 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10685 gen_rtx_IOR (SImode,
10688 gen_rtx_SUBREG (SImode, operands[0], 0),
10692 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10693 gen_rtx_IOR (SImode,
10694 gen_rtx_ASHIFT (SImode, scratch,
10696 gen_rtx_SUBREG (SImode, operands[0], 0)));
10699 /* Handle storing a half-word to memory during reload by synthesizing as two
10700 byte stores. Take care not to clobber the input values until after we
10701 have moved them somewhere safe. This code assumes that if the DImode
10702 scratch in operands[2] overlaps either the input value or output address
10703 in some way, then that value must die in this insn (we absolutely need
10704 two scratch registers for some corner cases). */
10706 arm_reload_out_hi (rtx *operands)
10708 rtx ref = operands[0];
10709 rtx outval = operands[1];
10711 HOST_WIDE_INT offset = 0;
10713 if (GET_CODE (ref) == SUBREG)
10715 offset = SUBREG_BYTE (ref);
10716 ref = SUBREG_REG (ref);
10719 if (GET_CODE (ref) == REG)
10721 /* We have a pseudo which has been spilt onto the stack; there
10722 are two cases here: the first where there is a simple
10723 stack-slot replacement and a second where the stack-slot is
10724 out of range, or is used as a subreg. */
10725 if (reg_equiv_mem[REGNO (ref)])
10727 ref = reg_equiv_mem[REGNO (ref)];
10728 base = find_replacement (&XEXP (ref, 0));
10731 /* The slot is out of range, or was dressed up in a SUBREG. */
10732 base = reg_equiv_address[REGNO (ref)];
10735 base = find_replacement (&XEXP (ref, 0));
10737 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10739 /* Handle the case where the address is too complex to be offset by 1. */
10740 if (GET_CODE (base) == MINUS
10741 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10743 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10745 /* Be careful not to destroy OUTVAL. */
10746 if (reg_overlap_mentioned_p (base_plus, outval))
10748 /* Updating base_plus might destroy outval, see if we can
10749 swap the scratch and base_plus. */
10750 if (!reg_overlap_mentioned_p (scratch, outval))
10753 scratch = base_plus;
10758 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10760 /* Be conservative and copy OUTVAL into the scratch now,
10761 this should only be necessary if outval is a subreg
10762 of something larger than a word. */
10763 /* XXX Might this clobber base? I can't see how it can,
10764 since scratch is known to overlap with OUTVAL, and
10765 must be wider than a word. */
10766 emit_insn (gen_movhi (scratch_hi, outval));
10767 outval = scratch_hi;
10771 emit_set_insn (base_plus, base);
10774 else if (GET_CODE (base) == PLUS)
10776 /* The addend must be CONST_INT, or we would have dealt with it above. */
10777 HOST_WIDE_INT hi, lo;
10779 offset += INTVAL (XEXP (base, 1));
10780 base = XEXP (base, 0);
10782 /* Rework the address into a legal sequence of insns. */
10783 /* Valid range for lo is -4095 -> 4095 */
10786 : -((-offset) & 0xfff));
10788 /* Corner case, if lo is the max offset then we would be out of range
10789 once we have added the additional 1 below, so bump the msb into the
10790 pre-loading insn(s). */
10794 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10795 ^ (HOST_WIDE_INT) 0x80000000)
10796 - (HOST_WIDE_INT) 0x80000000);
10798 gcc_assert (hi + lo == offset);
10802 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10804 /* Be careful not to destroy OUTVAL. */
10805 if (reg_overlap_mentioned_p (base_plus, outval))
10807 /* Updating base_plus might destroy outval, see if we
10808 can swap the scratch and base_plus. */
10809 if (!reg_overlap_mentioned_p (scratch, outval))
10812 scratch = base_plus;
10817 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10819 /* Be conservative and copy outval into scratch now,
10820 this should only be necessary if outval is a
10821 subreg of something larger than a word. */
10822 /* XXX Might this clobber base? I can't see how it
10823 can, since scratch is known to overlap with
10825 emit_insn (gen_movhi (scratch_hi, outval));
10826 outval = scratch_hi;
10830 /* Get the base address; addsi3 knows how to handle constants
10831 that require more than one insn. */
10832 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10838 if (BYTES_BIG_ENDIAN)
10840 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10841 plus_constant (base, offset + 1)),
10842 gen_lowpart (QImode, outval)));
10843 emit_insn (gen_lshrsi3 (scratch,
10844 gen_rtx_SUBREG (SImode, outval, 0),
10846 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10847 gen_lowpart (QImode, scratch)));
10851 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10852 gen_lowpart (QImode, outval)));
10853 emit_insn (gen_lshrsi3 (scratch,
10854 gen_rtx_SUBREG (SImode, outval, 0),
10856 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10857 plus_constant (base, offset + 1)),
10858 gen_lowpart (QImode, scratch)));
10862 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10863 (padded to the size of a word) should be passed in a register. */
10866 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10868 if (TARGET_AAPCS_BASED)
10869 return must_pass_in_stack_var_size (mode, type);
10871 return must_pass_in_stack_var_size_or_pad (mode, type);
10875 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10876 Return true if an argument passed on the stack should be padded upwards,
10877 i.e. if the least-significant byte has useful data.
10878 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10879 aggregate types are placed in the lowest memory address. */
10882 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10884 if (!TARGET_AAPCS_BASED)
10885 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10887 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10894 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10895 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10896 byte of the register has useful data, and return the opposite if the
10897 most significant byte does.
10898 For AAPCS, small aggregates and small complex types are always padded
10902 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10903 tree type, int first ATTRIBUTE_UNUSED)
10905 if (TARGET_AAPCS_BASED
10906 && BYTES_BIG_ENDIAN
10907 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10908 && int_size_in_bytes (type) <= 4)
10911 /* Otherwise, use default padding. */
10912 return !BYTES_BIG_ENDIAN;
10916 /* Print a symbolic form of X to the debug file, F. */
10918 arm_print_value (FILE *f, rtx x)
10920 switch (GET_CODE (x))
10923 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10927 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10935 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
10937 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
10938 if (i < (CONST_VECTOR_NUNITS (x) - 1))
10946 fprintf (f, "\"%s\"", XSTR (x, 0));
10950 fprintf (f, "`%s'", XSTR (x, 0));
10954 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
10958 arm_print_value (f, XEXP (x, 0));
10962 arm_print_value (f, XEXP (x, 0));
10964 arm_print_value (f, XEXP (x, 1));
10972 fprintf (f, "????");
10977 /* Routines for manipulation of the constant pool. */
10979 /* Arm instructions cannot load a large constant directly into a
10980 register; they have to come from a pc relative load. The constant
10981 must therefore be placed in the addressable range of the pc
10982 relative load. Depending on the precise pc relative load
10983 instruction the range is somewhere between 256 bytes and 4k. This
10984 means that we often have to dump a constant inside a function, and
10985 generate code to branch around it.
10987 It is important to minimize this, since the branches will slow
10988 things down and make the code larger.
10990 Normally we can hide the table after an existing unconditional
10991 branch so that there is no interruption of the flow, but in the
10992 worst case the code looks like this:
11010 We fix this by performing a scan after scheduling, which notices
11011 which instructions need to have their operands fetched from the
11012 constant table and builds the table.
11014 The algorithm starts by building a table of all the constants that
11015 need fixing up and all the natural barriers in the function (places
11016 where a constant table can be dropped without breaking the flow).
11017 For each fixup we note how far the pc-relative replacement will be
11018 able to reach and the offset of the instruction into the function.
11020 Having built the table we then group the fixes together to form
11021 tables that are as large as possible (subject to addressing
11022 constraints) and emit each table of constants after the last
11023 barrier that is within range of all the instructions in the group.
11024 If a group does not contain a barrier, then we forcibly create one
11025 by inserting a jump instruction into the flow. Once the table has
11026 been inserted, the insns are then modified to reference the
11027 relevant entry in the pool.
11029 Possible enhancements to the algorithm (not implemented) are:
11031 1) For some processors and object formats, there may be benefit in
11032 aligning the pools to the start of cache lines; this alignment
11033 would need to be taken into account when calculating addressability
11036 /* These typedefs are located at the start of this file, so that
11037 they can be used in the prototypes there. This comment is to
11038 remind readers of that fact so that the following structures
11039 can be understood more easily.
11041 typedef struct minipool_node Mnode;
11042 typedef struct minipool_fixup Mfix; */
11044 struct minipool_node
11046 /* Doubly linked chain of entries. */
11049 /* The maximum offset into the code that this entry can be placed. While
11050 pushing fixes for forward references, all entries are sorted in order
11051 of increasing max_address. */
11052 HOST_WIDE_INT max_address;
11053 /* Similarly for an entry inserted for a backwards ref. */
11054 HOST_WIDE_INT min_address;
11055 /* The number of fixes referencing this entry. This can become zero
11056 if we "unpush" an entry. In this case we ignore the entry when we
11057 come to emit the code. */
11059 /* The offset from the start of the minipool. */
11060 HOST_WIDE_INT offset;
11061 /* The value in table. */
11063 /* The mode of value. */
11064 enum machine_mode mode;
11065 /* The size of the value. With iWMMXt enabled
11066 sizes > 4 also imply an alignment of 8-bytes. */
11070 struct minipool_fixup
11074 HOST_WIDE_INT address;
11076 enum machine_mode mode;
11080 HOST_WIDE_INT forwards;
11081 HOST_WIDE_INT backwards;
11084 /* Fixes less than a word need padding out to a word boundary. */
11085 #define MINIPOOL_FIX_SIZE(mode) \
11086 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11088 static Mnode * minipool_vector_head;
11089 static Mnode * minipool_vector_tail;
11090 static rtx minipool_vector_label;
11091 static int minipool_pad;
11093 /* The linked list of all minipool fixes required for this function. */
11094 Mfix * minipool_fix_head;
11095 Mfix * minipool_fix_tail;
11096 /* The fix entry for the current minipool, once it has been placed. */
11097 Mfix * minipool_barrier;
11099 /* Determines if INSN is the start of a jump table. Returns the end
11100 of the TABLE or NULL_RTX. */
11102 is_jump_table (rtx insn)
11106 if (GET_CODE (insn) == JUMP_INSN
11107 && JUMP_LABEL (insn) != NULL
11108 && ((table = next_real_insn (JUMP_LABEL (insn)))
11109 == next_real_insn (insn))
11111 && GET_CODE (table) == JUMP_INSN
11112 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11113 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11119 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11120 #define JUMP_TABLES_IN_TEXT_SECTION 0
11123 static HOST_WIDE_INT
11124 get_jump_table_size (rtx insn)
11126 /* ADDR_VECs only take room if read-only data does into the text
11128 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11130 rtx body = PATTERN (insn);
11131 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11132 HOST_WIDE_INT size;
11133 HOST_WIDE_INT modesize;
11135 modesize = GET_MODE_SIZE (GET_MODE (body));
11136 size = modesize * XVECLEN (body, elt);
11140 /* Round up size of TBB table to a halfword boundary. */
11141 size = (size + 1) & ~(HOST_WIDE_INT)1;
11144 /* No padding necessary for TBH. */
11147 /* Add two bytes for alignment on Thumb. */
11152 gcc_unreachable ();
11160 /* Move a minipool fix MP from its current location to before MAX_MP.
11161 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11162 constraints may need updating. */
11164 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11165 HOST_WIDE_INT max_address)
11167 /* The code below assumes these are different. */
11168 gcc_assert (mp != max_mp);
11170 if (max_mp == NULL)
11172 if (max_address < mp->max_address)
11173 mp->max_address = max_address;
11177 if (max_address > max_mp->max_address - mp->fix_size)
11178 mp->max_address = max_mp->max_address - mp->fix_size;
11180 mp->max_address = max_address;
11182 /* Unlink MP from its current position. Since max_mp is non-null,
11183 mp->prev must be non-null. */
11184 mp->prev->next = mp->next;
11185 if (mp->next != NULL)
11186 mp->next->prev = mp->prev;
11188 minipool_vector_tail = mp->prev;
11190 /* Re-insert it before MAX_MP. */
11192 mp->prev = max_mp->prev;
11195 if (mp->prev != NULL)
11196 mp->prev->next = mp;
11198 minipool_vector_head = mp;
11201 /* Save the new entry. */
11204 /* Scan over the preceding entries and adjust their addresses as
11206 while (mp->prev != NULL
11207 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11209 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11216 /* Add a constant to the minipool for a forward reference. Returns the
11217 node added or NULL if the constant will not fit in this pool. */
11219 add_minipool_forward_ref (Mfix *fix)
11221 /* If set, max_mp is the first pool_entry that has a lower
11222 constraint than the one we are trying to add. */
11223 Mnode * max_mp = NULL;
11224 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11227 /* If the minipool starts before the end of FIX->INSN then this FIX
11228 can not be placed into the current pool. Furthermore, adding the
11229 new constant pool entry may cause the pool to start FIX_SIZE bytes
11231 if (minipool_vector_head &&
11232 (fix->address + get_attr_length (fix->insn)
11233 >= minipool_vector_head->max_address - fix->fix_size))
11236 /* Scan the pool to see if a constant with the same value has
11237 already been added. While we are doing this, also note the
11238 location where we must insert the constant if it doesn't already
11240 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11242 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11243 && fix->mode == mp->mode
11244 && (GET_CODE (fix->value) != CODE_LABEL
11245 || (CODE_LABEL_NUMBER (fix->value)
11246 == CODE_LABEL_NUMBER (mp->value)))
11247 && rtx_equal_p (fix->value, mp->value))
11249 /* More than one fix references this entry. */
11251 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11254 /* Note the insertion point if necessary. */
11256 && mp->max_address > max_address)
11259 /* If we are inserting an 8-bytes aligned quantity and
11260 we have not already found an insertion point, then
11261 make sure that all such 8-byte aligned quantities are
11262 placed at the start of the pool. */
11263 if (ARM_DOUBLEWORD_ALIGN
11265 && fix->fix_size >= 8
11266 && mp->fix_size < 8)
11269 max_address = mp->max_address;
11273 /* The value is not currently in the minipool, so we need to create
11274 a new entry for it. If MAX_MP is NULL, the entry will be put on
11275 the end of the list since the placement is less constrained than
11276 any existing entry. Otherwise, we insert the new fix before
11277 MAX_MP and, if necessary, adjust the constraints on the other
11280 mp->fix_size = fix->fix_size;
11281 mp->mode = fix->mode;
11282 mp->value = fix->value;
11284 /* Not yet required for a backwards ref. */
11285 mp->min_address = -65536;
11287 if (max_mp == NULL)
11289 mp->max_address = max_address;
11291 mp->prev = minipool_vector_tail;
11293 if (mp->prev == NULL)
11295 minipool_vector_head = mp;
11296 minipool_vector_label = gen_label_rtx ();
11299 mp->prev->next = mp;
11301 minipool_vector_tail = mp;
11305 if (max_address > max_mp->max_address - mp->fix_size)
11306 mp->max_address = max_mp->max_address - mp->fix_size;
11308 mp->max_address = max_address;
11311 mp->prev = max_mp->prev;
11313 if (mp->prev != NULL)
11314 mp->prev->next = mp;
11316 minipool_vector_head = mp;
11319 /* Save the new entry. */
11322 /* Scan over the preceding entries and adjust their addresses as
11324 while (mp->prev != NULL
11325 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11327 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11335 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11336 HOST_WIDE_INT min_address)
11338 HOST_WIDE_INT offset;
11340 /* The code below assumes these are different. */
11341 gcc_assert (mp != min_mp);
11343 if (min_mp == NULL)
11345 if (min_address > mp->min_address)
11346 mp->min_address = min_address;
11350 /* We will adjust this below if it is too loose. */
11351 mp->min_address = min_address;
11353 /* Unlink MP from its current position. Since min_mp is non-null,
11354 mp->next must be non-null. */
11355 mp->next->prev = mp->prev;
11356 if (mp->prev != NULL)
11357 mp->prev->next = mp->next;
11359 minipool_vector_head = mp->next;
11361 /* Reinsert it after MIN_MP. */
11363 mp->next = min_mp->next;
11365 if (mp->next != NULL)
11366 mp->next->prev = mp;
11368 minipool_vector_tail = mp;
11374 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11376 mp->offset = offset;
11377 if (mp->refcount > 0)
11378 offset += mp->fix_size;
11380 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11381 mp->next->min_address = mp->min_address + mp->fix_size;
11387 /* Add a constant to the minipool for a backward reference. Returns the
11388 node added or NULL if the constant will not fit in this pool.
11390 Note that the code for insertion for a backwards reference can be
11391 somewhat confusing because the calculated offsets for each fix do
11392 not take into account the size of the pool (which is still under
11395 add_minipool_backward_ref (Mfix *fix)
11397 /* If set, min_mp is the last pool_entry that has a lower constraint
11398 than the one we are trying to add. */
11399 Mnode *min_mp = NULL;
11400 /* This can be negative, since it is only a constraint. */
11401 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11404 /* If we can't reach the current pool from this insn, or if we can't
11405 insert this entry at the end of the pool without pushing other
11406 fixes out of range, then we don't try. This ensures that we
11407 can't fail later on. */
11408 if (min_address >= minipool_barrier->address
11409 || (minipool_vector_tail->min_address + fix->fix_size
11410 >= minipool_barrier->address))
11413 /* Scan the pool to see if a constant with the same value has
11414 already been added. While we are doing this, also note the
11415 location where we must insert the constant if it doesn't already
11417 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11419 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11420 && fix->mode == mp->mode
11421 && (GET_CODE (fix->value) != CODE_LABEL
11422 || (CODE_LABEL_NUMBER (fix->value)
11423 == CODE_LABEL_NUMBER (mp->value)))
11424 && rtx_equal_p (fix->value, mp->value)
11425 /* Check that there is enough slack to move this entry to the
11426 end of the table (this is conservative). */
11427 && (mp->max_address
11428 > (minipool_barrier->address
11429 + minipool_vector_tail->offset
11430 + minipool_vector_tail->fix_size)))
11433 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11436 if (min_mp != NULL)
11437 mp->min_address += fix->fix_size;
11440 /* Note the insertion point if necessary. */
11441 if (mp->min_address < min_address)
11443 /* For now, we do not allow the insertion of 8-byte alignment
11444 requiring nodes anywhere but at the start of the pool. */
11445 if (ARM_DOUBLEWORD_ALIGN
11446 && fix->fix_size >= 8 && mp->fix_size < 8)
11451 else if (mp->max_address
11452 < minipool_barrier->address + mp->offset + fix->fix_size)
11454 /* Inserting before this entry would push the fix beyond
11455 its maximum address (which can happen if we have
11456 re-located a forwards fix); force the new fix to come
11458 if (ARM_DOUBLEWORD_ALIGN
11459 && fix->fix_size >= 8 && mp->fix_size < 8)
11464 min_address = mp->min_address + fix->fix_size;
11467 /* Do not insert a non-8-byte aligned quantity before 8-byte
11468 aligned quantities. */
11469 else if (ARM_DOUBLEWORD_ALIGN
11470 && fix->fix_size < 8
11471 && mp->fix_size >= 8)
11474 min_address = mp->min_address + fix->fix_size;
11479 /* We need to create a new entry. */
11481 mp->fix_size = fix->fix_size;
11482 mp->mode = fix->mode;
11483 mp->value = fix->value;
11485 mp->max_address = minipool_barrier->address + 65536;
11487 mp->min_address = min_address;
11489 if (min_mp == NULL)
11492 mp->next = minipool_vector_head;
11494 if (mp->next == NULL)
11496 minipool_vector_tail = mp;
11497 minipool_vector_label = gen_label_rtx ();
11500 mp->next->prev = mp;
11502 minipool_vector_head = mp;
11506 mp->next = min_mp->next;
11510 if (mp->next != NULL)
11511 mp->next->prev = mp;
11513 minipool_vector_tail = mp;
11516 /* Save the new entry. */
11524 /* Scan over the following entries and adjust their offsets. */
11525 while (mp->next != NULL)
11527 if (mp->next->min_address < mp->min_address + mp->fix_size)
11528 mp->next->min_address = mp->min_address + mp->fix_size;
11531 mp->next->offset = mp->offset + mp->fix_size;
11533 mp->next->offset = mp->offset;
11542 assign_minipool_offsets (Mfix *barrier)
11544 HOST_WIDE_INT offset = 0;
11547 minipool_barrier = barrier;
11549 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11551 mp->offset = offset;
11553 if (mp->refcount > 0)
11554 offset += mp->fix_size;
11558 /* Output the literal table */
11560 dump_minipool (rtx scan)
11566 if (ARM_DOUBLEWORD_ALIGN)
11567 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11568 if (mp->refcount > 0 && mp->fix_size >= 8)
11575 fprintf (dump_file,
11576 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11577 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11579 scan = emit_label_after (gen_label_rtx (), scan);
11580 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11581 scan = emit_label_after (minipool_vector_label, scan);
11583 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11585 if (mp->refcount > 0)
11589 fprintf (dump_file,
11590 ";; Offset %u, min %ld, max %ld ",
11591 (unsigned) mp->offset, (unsigned long) mp->min_address,
11592 (unsigned long) mp->max_address);
11593 arm_print_value (dump_file, mp->value);
11594 fputc ('\n', dump_file);
11597 switch (mp->fix_size)
11599 #ifdef HAVE_consttable_1
11601 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11605 #ifdef HAVE_consttable_2
11607 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11611 #ifdef HAVE_consttable_4
11613 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11617 #ifdef HAVE_consttable_8
11619 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11623 #ifdef HAVE_consttable_16
11625 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11630 gcc_unreachable ();
11638 minipool_vector_head = minipool_vector_tail = NULL;
11639 scan = emit_insn_after (gen_consttable_end (), scan);
11640 scan = emit_barrier_after (scan);
11643 /* Return the cost of forcibly inserting a barrier after INSN. */
11645 arm_barrier_cost (rtx insn)
11647 /* Basing the location of the pool on the loop depth is preferable,
11648 but at the moment, the basic block information seems to be
11649 corrupt by this stage of the compilation. */
11650 int base_cost = 50;
11651 rtx next = next_nonnote_insn (insn);
11653 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11656 switch (GET_CODE (insn))
11659 /* It will always be better to place the table before the label, rather
11668 return base_cost - 10;
11671 return base_cost + 10;
11675 /* Find the best place in the insn stream in the range
11676 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11677 Create the barrier by inserting a jump and add a new fix entry for
11680 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11682 HOST_WIDE_INT count = 0;
11684 rtx from = fix->insn;
11685 /* The instruction after which we will insert the jump. */
11686 rtx selected = NULL;
11688 /* The address at which the jump instruction will be placed. */
11689 HOST_WIDE_INT selected_address;
11691 HOST_WIDE_INT max_count = max_address - fix->address;
11692 rtx label = gen_label_rtx ();
11694 selected_cost = arm_barrier_cost (from);
11695 selected_address = fix->address;
11697 while (from && count < max_count)
11702 /* This code shouldn't have been called if there was a natural barrier
11704 gcc_assert (GET_CODE (from) != BARRIER);
11706 /* Count the length of this insn. */
11707 count += get_attr_length (from);
11709 /* If there is a jump table, add its length. */
11710 tmp = is_jump_table (from);
11713 count += get_jump_table_size (tmp);
11715 /* Jump tables aren't in a basic block, so base the cost on
11716 the dispatch insn. If we select this location, we will
11717 still put the pool after the table. */
11718 new_cost = arm_barrier_cost (from);
11720 if (count < max_count
11721 && (!selected || new_cost <= selected_cost))
11724 selected_cost = new_cost;
11725 selected_address = fix->address + count;
11728 /* Continue after the dispatch table. */
11729 from = NEXT_INSN (tmp);
11733 new_cost = arm_barrier_cost (from);
11735 if (count < max_count
11736 && (!selected || new_cost <= selected_cost))
11739 selected_cost = new_cost;
11740 selected_address = fix->address + count;
11743 from = NEXT_INSN (from);
11746 /* Make sure that we found a place to insert the jump. */
11747 gcc_assert (selected);
11749 /* Create a new JUMP_INSN that branches around a barrier. */
11750 from = emit_jump_insn_after (gen_jump (label), selected);
11751 JUMP_LABEL (from) = label;
11752 barrier = emit_barrier_after (from);
11753 emit_label_after (label, barrier);
11755 /* Create a minipool barrier entry for the new barrier. */
11756 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11757 new_fix->insn = barrier;
11758 new_fix->address = selected_address;
11759 new_fix->next = fix->next;
11760 fix->next = new_fix;
11765 /* Record that there is a natural barrier in the insn stream at
11768 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11770 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11773 fix->address = address;
11776 if (minipool_fix_head != NULL)
11777 minipool_fix_tail->next = fix;
11779 minipool_fix_head = fix;
11781 minipool_fix_tail = fix;
11784 /* Record INSN, which will need fixing up to load a value from the
11785 minipool. ADDRESS is the offset of the insn since the start of the
11786 function; LOC is a pointer to the part of the insn which requires
11787 fixing; VALUE is the constant that must be loaded, which is of type
11790 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11791 enum machine_mode mode, rtx value)
11793 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11796 fix->address = address;
11799 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11800 fix->value = value;
11801 fix->forwards = get_attr_pool_range (insn);
11802 fix->backwards = get_attr_neg_pool_range (insn);
11803 fix->minipool = NULL;
11805 /* If an insn doesn't have a range defined for it, then it isn't
11806 expecting to be reworked by this code. Better to stop now than
11807 to generate duff assembly code. */
11808 gcc_assert (fix->forwards || fix->backwards);
11810 /* If an entry requires 8-byte alignment then assume all constant pools
11811 require 4 bytes of padding. Trying to do this later on a per-pool
11812 basis is awkward because existing pool entries have to be modified. */
11813 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11818 fprintf (dump_file,
11819 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11820 GET_MODE_NAME (mode),
11821 INSN_UID (insn), (unsigned long) address,
11822 -1 * (long)fix->backwards, (long)fix->forwards);
11823 arm_print_value (dump_file, fix->value);
11824 fprintf (dump_file, "\n");
11827 /* Add it to the chain of fixes. */
11830 if (minipool_fix_head != NULL)
11831 minipool_fix_tail->next = fix;
11833 minipool_fix_head = fix;
11835 minipool_fix_tail = fix;
11838 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11839 Returns the number of insns needed, or 99 if we don't know how to
11842 arm_const_double_inline_cost (rtx val)
11844 rtx lowpart, highpart;
11845 enum machine_mode mode;
11847 mode = GET_MODE (val);
11849 if (mode == VOIDmode)
11852 gcc_assert (GET_MODE_SIZE (mode) == 8);
11854 lowpart = gen_lowpart (SImode, val);
11855 highpart = gen_highpart_mode (SImode, mode, val);
11857 gcc_assert (GET_CODE (lowpart) == CONST_INT);
11858 gcc_assert (GET_CODE (highpart) == CONST_INT);
11860 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
11861 NULL_RTX, NULL_RTX, 0, 0)
11862 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11863 NULL_RTX, NULL_RTX, 0, 0));
11866 /* Return true if it is worthwhile to split a 64-bit constant into two
11867 32-bit operations. This is the case if optimizing for size, or
11868 if we have load delay slots, or if one 32-bit part can be done with
11869 a single data operation. */
11871 arm_const_double_by_parts (rtx val)
11873 enum machine_mode mode = GET_MODE (val);
11876 if (optimize_size || arm_ld_sched)
11879 if (mode == VOIDmode)
11882 part = gen_highpart_mode (SImode, mode, val);
11884 gcc_assert (GET_CODE (part) == CONST_INT);
11886 if (const_ok_for_arm (INTVAL (part))
11887 || const_ok_for_arm (~INTVAL (part)))
11890 part = gen_lowpart (SImode, val);
11892 gcc_assert (GET_CODE (part) == CONST_INT);
11894 if (const_ok_for_arm (INTVAL (part))
11895 || const_ok_for_arm (~INTVAL (part)))
11901 /* Return true if it is possible to inline both the high and low parts
11902 of a 64-bit constant into 32-bit data processing instructions. */
11904 arm_const_double_by_immediates (rtx val)
11906 enum machine_mode mode = GET_MODE (val);
11909 if (mode == VOIDmode)
11912 part = gen_highpart_mode (SImode, mode, val);
11914 gcc_assert (GET_CODE (part) == CONST_INT);
11916 if (!const_ok_for_arm (INTVAL (part)))
11919 part = gen_lowpart (SImode, val);
11921 gcc_assert (GET_CODE (part) == CONST_INT);
11923 if (!const_ok_for_arm (INTVAL (part)))
11929 /* Scan INSN and note any of its operands that need fixing.
11930 If DO_PUSHES is false we do not actually push any of the fixups
11931 needed. The function returns TRUE if any fixups were needed/pushed.
11932 This is used by arm_memory_load_p() which needs to know about loads
11933 of constants that will be converted into minipool loads. */
11935 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
11937 bool result = false;
11940 extract_insn (insn);
11942 if (!constrain_operands (1))
11943 fatal_insn_not_found (insn);
11945 if (recog_data.n_alternatives == 0)
11948 /* Fill in recog_op_alt with information about the constraints of
11950 preprocess_constraints ();
11952 for (opno = 0; opno < recog_data.n_operands; opno++)
11954 /* Things we need to fix can only occur in inputs. */
11955 if (recog_data.operand_type[opno] != OP_IN)
11958 /* If this alternative is a memory reference, then any mention
11959 of constants in this alternative is really to fool reload
11960 into allowing us to accept one there. We need to fix them up
11961 now so that we output the right code. */
11962 if (recog_op_alt[opno][which_alternative].memory_ok)
11964 rtx op = recog_data.operand[opno];
11966 if (CONSTANT_P (op))
11969 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
11970 recog_data.operand_mode[opno], op);
11973 else if (GET_CODE (op) == MEM
11974 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
11975 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
11979 rtx cop = avoid_constant_pool_reference (op);
11981 /* Casting the address of something to a mode narrower
11982 than a word can cause avoid_constant_pool_reference()
11983 to return the pool reference itself. That's no good to
11984 us here. Lets just hope that we can use the
11985 constant pool value directly. */
11987 cop = get_pool_constant (XEXP (op, 0));
11989 push_minipool_fix (insn, address,
11990 recog_data.operand_loc[opno],
11991 recog_data.operand_mode[opno], cop);
12002 /* Convert instructions to their cc-clobbering variant if possible, since
12003 that allows us to use smaller encodings. */
12006 thumb2_reorg (void)
12011 INIT_REG_SET (&live);
12013 /* We are freeing block_for_insn in the toplev to keep compatibility
12014 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12015 compute_bb_for_insn ();
12021 COPY_REG_SET (&live, DF_LR_OUT (bb));
12022 df_simulate_initialize_backwards (bb, &live);
12023 FOR_BB_INSNS_REVERSE (bb, insn)
12025 if (NONJUMP_INSN_P (insn)
12026 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12028 rtx pat = PATTERN (insn);
12029 if (GET_CODE (pat) == SET
12030 && low_register_operand (XEXP (pat, 0), SImode)
12031 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12032 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12033 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12035 rtx dst = XEXP (pat, 0);
12036 rtx src = XEXP (pat, 1);
12037 rtx op0 = XEXP (src, 0);
12038 if (rtx_equal_p (dst, op0)
12039 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12041 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12042 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12043 rtvec vec = gen_rtvec (2, pat, clobber);
12044 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12045 INSN_CODE (insn) = -1;
12049 if (NONDEBUG_INSN_P (insn))
12050 df_simulate_one_insn_backwards (bb, insn, &live);
12053 CLEAR_REG_SET (&live);
12056 /* Gcc puts the pool in the wrong place for ARM, since we can only
12057 load addresses a limited distance around the pc. We do some
12058 special munging to move the constant pool values to the correct
12059 point in the code. */
12064 HOST_WIDE_INT address = 0;
12070 minipool_fix_head = minipool_fix_tail = NULL;
12072 /* The first insn must always be a note, or the code below won't
12073 scan it properly. */
12074 insn = get_insns ();
12075 gcc_assert (GET_CODE (insn) == NOTE);
12078 /* Scan all the insns and record the operands that will need fixing. */
12079 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12081 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12082 && (arm_cirrus_insn_p (insn)
12083 || GET_CODE (insn) == JUMP_INSN
12084 || arm_memory_load_p (insn)))
12085 cirrus_reorg (insn);
12087 if (GET_CODE (insn) == BARRIER)
12088 push_minipool_barrier (insn, address);
12089 else if (INSN_P (insn))
12093 note_invalid_constants (insn, address, true);
12094 address += get_attr_length (insn);
12096 /* If the insn is a vector jump, add the size of the table
12097 and skip the table. */
12098 if ((table = is_jump_table (insn)) != NULL)
12100 address += get_jump_table_size (table);
12106 fix = minipool_fix_head;
12108 /* Now scan the fixups and perform the required changes. */
12113 Mfix * last_added_fix;
12114 Mfix * last_barrier = NULL;
12117 /* Skip any further barriers before the next fix. */
12118 while (fix && GET_CODE (fix->insn) == BARRIER)
12121 /* No more fixes. */
12125 last_added_fix = NULL;
12127 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12129 if (GET_CODE (ftmp->insn) == BARRIER)
12131 if (ftmp->address >= minipool_vector_head->max_address)
12134 last_barrier = ftmp;
12136 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12139 last_added_fix = ftmp; /* Keep track of the last fix added. */
12142 /* If we found a barrier, drop back to that; any fixes that we
12143 could have reached but come after the barrier will now go in
12144 the next mini-pool. */
12145 if (last_barrier != NULL)
12147 /* Reduce the refcount for those fixes that won't go into this
12149 for (fdel = last_barrier->next;
12150 fdel && fdel != ftmp;
12153 fdel->minipool->refcount--;
12154 fdel->minipool = NULL;
12157 ftmp = last_barrier;
12161 /* ftmp is first fix that we can't fit into this pool and
12162 there no natural barriers that we could use. Insert a
12163 new barrier in the code somewhere between the previous
12164 fix and this one, and arrange to jump around it. */
12165 HOST_WIDE_INT max_address;
12167 /* The last item on the list of fixes must be a barrier, so
12168 we can never run off the end of the list of fixes without
12169 last_barrier being set. */
12172 max_address = minipool_vector_head->max_address;
12173 /* Check that there isn't another fix that is in range that
12174 we couldn't fit into this pool because the pool was
12175 already too large: we need to put the pool before such an
12176 instruction. The pool itself may come just after the
12177 fix because create_fix_barrier also allows space for a
12178 jump instruction. */
12179 if (ftmp->address < max_address)
12180 max_address = ftmp->address + 1;
12182 last_barrier = create_fix_barrier (last_added_fix, max_address);
12185 assign_minipool_offsets (last_barrier);
12189 if (GET_CODE (ftmp->insn) != BARRIER
12190 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12197 /* Scan over the fixes we have identified for this pool, fixing them
12198 up and adding the constants to the pool itself. */
12199 for (this_fix = fix; this_fix && ftmp != this_fix;
12200 this_fix = this_fix->next)
12201 if (GET_CODE (this_fix->insn) != BARRIER)
12204 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12205 minipool_vector_label),
12206 this_fix->minipool->offset);
12207 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12210 dump_minipool (last_barrier->insn);
12214 /* From now on we must synthesize any constants that we can't handle
12215 directly. This can happen if the RTL gets split during final
12216 instruction generation. */
12217 after_arm_reorg = 1;
12219 /* Free the minipool memory. */
12220 obstack_free (&minipool_obstack, minipool_startobj);
12223 /* Routines to output assembly language. */
12225 /* If the rtx is the correct value then return the string of the number.
12226 In this way we can ensure that valid double constants are generated even
12227 when cross compiling. */
12229 fp_immediate_constant (rtx x)
12234 if (!fp_consts_inited)
12237 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12238 for (i = 0; i < 8; i++)
12239 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12240 return strings_fp[i];
12242 gcc_unreachable ();
12245 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12246 static const char *
12247 fp_const_from_val (REAL_VALUE_TYPE *r)
12251 if (!fp_consts_inited)
12254 for (i = 0; i < 8; i++)
12255 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12256 return strings_fp[i];
12258 gcc_unreachable ();
12261 /* Output the operands of a LDM/STM instruction to STREAM.
12262 MASK is the ARM register set mask of which only bits 0-15 are important.
12263 REG is the base register, either the frame pointer or the stack pointer,
12264 INSTR is the possibly suffixed load or store instruction.
12265 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12268 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12269 unsigned long mask, int rfe)
12272 bool not_first = FALSE;
12274 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12275 fputc ('\t', stream);
12276 asm_fprintf (stream, instr, reg);
12277 fputc ('{', stream);
12279 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12280 if (mask & (1 << i))
12283 fprintf (stream, ", ");
12285 asm_fprintf (stream, "%r", i);
12290 fprintf (stream, "}^\n");
12292 fprintf (stream, "}\n");
12296 /* Output a FLDMD instruction to STREAM.
12297 BASE if the register containing the address.
12298 REG and COUNT specify the register range.
12299 Extra registers may be added to avoid hardware bugs.
12301 We output FLDMD even for ARMv5 VFP implementations. Although
12302 FLDMD is technically not supported until ARMv6, it is believed
12303 that all VFP implementations support its use in this context. */
12306 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12310 /* Workaround ARM10 VFPr1 bug. */
12311 if (count == 2 && !arm_arch6)
12318 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12319 load into multiple parts if we have to handle more than 16 registers. */
12322 vfp_output_fldmd (stream, base, reg, 16);
12323 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12327 fputc ('\t', stream);
12328 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12330 for (i = reg; i < reg + count; i++)
12333 fputs (", ", stream);
12334 asm_fprintf (stream, "d%d", i);
12336 fputs ("}\n", stream);
12341 /* Output the assembly for a store multiple. */
12344 vfp_output_fstmd (rtx * operands)
12351 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12352 p = strlen (pattern);
12354 gcc_assert (GET_CODE (operands[1]) == REG);
12356 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12357 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12359 p += sprintf (&pattern[p], ", d%d", base + i);
12361 strcpy (&pattern[p], "}");
12363 output_asm_insn (pattern, operands);
12368 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12369 number of bytes pushed. */
12372 vfp_emit_fstmd (int base_reg, int count)
12379 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12380 register pairs are stored by a store multiple insn. We avoid this
12381 by pushing an extra pair. */
12382 if (count == 2 && !arm_arch6)
12384 if (base_reg == LAST_VFP_REGNUM - 3)
12389 /* FSTMD may not store more than 16 doubleword registers at once. Split
12390 larger stores into multiple parts (up to a maximum of two, in
12395 /* NOTE: base_reg is an internal register number, so each D register
12397 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12398 saved += vfp_emit_fstmd (base_reg, 16);
12402 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12403 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12405 reg = gen_rtx_REG (DFmode, base_reg);
12408 XVECEXP (par, 0, 0)
12409 = gen_rtx_SET (VOIDmode,
12412 gen_rtx_PRE_MODIFY (Pmode,
12415 (stack_pointer_rtx,
12418 gen_rtx_UNSPEC (BLKmode,
12419 gen_rtvec (1, reg),
12420 UNSPEC_PUSH_MULT));
12422 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12423 plus_constant (stack_pointer_rtx, -(count * 8)));
12424 RTX_FRAME_RELATED_P (tmp) = 1;
12425 XVECEXP (dwarf, 0, 0) = tmp;
12427 tmp = gen_rtx_SET (VOIDmode,
12428 gen_frame_mem (DFmode, stack_pointer_rtx),
12430 RTX_FRAME_RELATED_P (tmp) = 1;
12431 XVECEXP (dwarf, 0, 1) = tmp;
12433 for (i = 1; i < count; i++)
12435 reg = gen_rtx_REG (DFmode, base_reg);
12437 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12439 tmp = gen_rtx_SET (VOIDmode,
12440 gen_frame_mem (DFmode,
12441 plus_constant (stack_pointer_rtx,
12444 RTX_FRAME_RELATED_P (tmp) = 1;
12445 XVECEXP (dwarf, 0, i + 1) = tmp;
12448 par = emit_insn (par);
12449 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12450 RTX_FRAME_RELATED_P (par) = 1;
12455 /* Emit a call instruction with pattern PAT. ADDR is the address of
12456 the call target. */
12459 arm_emit_call_insn (rtx pat, rtx addr)
12463 insn = emit_call_insn (pat);
12465 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12466 If the call might use such an entry, add a use of the PIC register
12467 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12468 if (TARGET_VXWORKS_RTP
12470 && GET_CODE (addr) == SYMBOL_REF
12471 && (SYMBOL_REF_DECL (addr)
12472 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12473 : !SYMBOL_REF_LOCAL_P (addr)))
12475 require_pic_register ();
12476 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12480 /* Output a 'call' insn. */
12482 output_call (rtx *operands)
12484 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12486 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12487 if (REGNO (operands[0]) == LR_REGNUM)
12489 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12490 output_asm_insn ("mov%?\t%0, %|lr", operands);
12493 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12495 if (TARGET_INTERWORK || arm_arch4t)
12496 output_asm_insn ("bx%?\t%0", operands);
12498 output_asm_insn ("mov%?\t%|pc, %0", operands);
12503 /* Output a 'call' insn that is a reference in memory. This is
12504 disabled for ARMv5 and we prefer a blx instead because otherwise
12505 there's a significant performance overhead. */
12507 output_call_mem (rtx *operands)
12509 gcc_assert (!arm_arch5);
12510 if (TARGET_INTERWORK)
12512 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12513 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12514 output_asm_insn ("bx%?\t%|ip", operands);
12516 else if (regno_use_in (LR_REGNUM, operands[0]))
12518 /* LR is used in the memory address. We load the address in the
12519 first instruction. It's safe to use IP as the target of the
12520 load since the call will kill it anyway. */
12521 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12522 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12524 output_asm_insn ("bx%?\t%|ip", operands);
12526 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12530 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12531 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12538 /* Output a move from arm registers to an fpa registers.
12539 OPERANDS[0] is an fpa register.
12540 OPERANDS[1] is the first registers of an arm register pair. */
12542 output_mov_long_double_fpa_from_arm (rtx *operands)
12544 int arm_reg0 = REGNO (operands[1]);
12547 gcc_assert (arm_reg0 != IP_REGNUM);
12549 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12550 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12551 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12553 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12554 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12559 /* Output a move from an fpa register to arm registers.
12560 OPERANDS[0] is the first registers of an arm register pair.
12561 OPERANDS[1] is an fpa register. */
12563 output_mov_long_double_arm_from_fpa (rtx *operands)
12565 int arm_reg0 = REGNO (operands[0]);
12568 gcc_assert (arm_reg0 != IP_REGNUM);
12570 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12571 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12572 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12574 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12575 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12579 /* Output a move from arm registers to arm registers of a long double
12580 OPERANDS[0] is the destination.
12581 OPERANDS[1] is the source. */
12583 output_mov_long_double_arm_from_arm (rtx *operands)
12585 /* We have to be careful here because the two might overlap. */
12586 int dest_start = REGNO (operands[0]);
12587 int src_start = REGNO (operands[1]);
12591 if (dest_start < src_start)
12593 for (i = 0; i < 3; i++)
12595 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12596 ops[1] = gen_rtx_REG (SImode, src_start + i);
12597 output_asm_insn ("mov%?\t%0, %1", ops);
12602 for (i = 2; i >= 0; i--)
12604 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12605 ops[1] = gen_rtx_REG (SImode, src_start + i);
12606 output_asm_insn ("mov%?\t%0, %1", ops);
12614 arm_emit_movpair (rtx dest, rtx src)
12616 /* If the src is an immediate, simplify it. */
12617 if (CONST_INT_P (src))
12619 HOST_WIDE_INT val = INTVAL (src);
12620 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12621 if ((val >> 16) & 0x0000ffff)
12622 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12624 GEN_INT ((val >> 16) & 0x0000ffff));
12627 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12628 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12631 /* Output a move from arm registers to an fpa registers.
12632 OPERANDS[0] is an fpa register.
12633 OPERANDS[1] is the first registers of an arm register pair. */
12635 output_mov_double_fpa_from_arm (rtx *operands)
12637 int arm_reg0 = REGNO (operands[1]);
12640 gcc_assert (arm_reg0 != IP_REGNUM);
12642 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12643 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12644 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12645 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12649 /* Output a move from an fpa register to arm registers.
12650 OPERANDS[0] is the first registers of an arm register pair.
12651 OPERANDS[1] is an fpa register. */
12653 output_mov_double_arm_from_fpa (rtx *operands)
12655 int arm_reg0 = REGNO (operands[0]);
12658 gcc_assert (arm_reg0 != IP_REGNUM);
12660 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12661 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12662 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12663 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12667 /* Output a move between double words.
12668 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12669 or MEM<-REG and all MEMs must be offsettable addresses. */
12671 output_move_double (rtx *operands)
12673 enum rtx_code code0 = GET_CODE (operands[0]);
12674 enum rtx_code code1 = GET_CODE (operands[1]);
12679 unsigned int reg0 = REGNO (operands[0]);
12681 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12683 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12685 switch (GET_CODE (XEXP (operands[1], 0)))
12689 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12690 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12692 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12696 gcc_assert (TARGET_LDRD);
12697 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12702 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12704 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12709 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12711 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12715 gcc_assert (TARGET_LDRD);
12716 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12721 /* Autoicrement addressing modes should never have overlapping
12722 base and destination registers, and overlapping index registers
12723 are already prohibited, so this doesn't need to worry about
12725 otherops[0] = operands[0];
12726 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12727 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12729 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12731 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12733 /* Registers overlap so split out the increment. */
12734 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12735 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12739 /* Use a single insn if we can.
12740 FIXME: IWMMXT allows offsets larger than ldrd can
12741 handle, fix these up with a pair of ldr. */
12743 || GET_CODE (otherops[2]) != CONST_INT
12744 || (INTVAL (otherops[2]) > -256
12745 && INTVAL (otherops[2]) < 256))
12746 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12749 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12750 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12756 /* Use a single insn if we can.
12757 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12758 fix these up with a pair of ldr. */
12760 || GET_CODE (otherops[2]) != CONST_INT
12761 || (INTVAL (otherops[2]) > -256
12762 && INTVAL (otherops[2]) < 256))
12763 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12766 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12767 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12774 /* We might be able to use ldrd %0, %1 here. However the range is
12775 different to ldr/adr, and it is broken on some ARMv7-M
12776 implementations. */
12777 /* Use the second register of the pair to avoid problematic
12779 otherops[1] = operands[1];
12780 output_asm_insn ("adr%?\t%0, %1", otherops);
12781 operands[1] = otherops[0];
12783 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12785 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12788 /* ??? This needs checking for thumb2. */
12790 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12791 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12793 otherops[0] = operands[0];
12794 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12795 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12797 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12799 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12801 switch ((int) INTVAL (otherops[2]))
12804 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12809 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12814 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12818 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12819 operands[1] = otherops[0];
12821 && (GET_CODE (otherops[2]) == REG
12823 || (GET_CODE (otherops[2]) == CONST_INT
12824 && INTVAL (otherops[2]) > -256
12825 && INTVAL (otherops[2]) < 256)))
12827 if (reg_overlap_mentioned_p (operands[0],
12831 /* Swap base and index registers over to
12832 avoid a conflict. */
12834 otherops[1] = otherops[2];
12837 /* If both registers conflict, it will usually
12838 have been fixed by a splitter. */
12839 if (reg_overlap_mentioned_p (operands[0], otherops[2])
12840 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
12842 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12843 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12847 otherops[0] = operands[0];
12848 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
12853 if (GET_CODE (otherops[2]) == CONST_INT)
12855 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
12856 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
12858 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12861 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12864 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
12867 return "ldr%(d%)\t%0, [%1]";
12869 return "ldm%(ia%)\t%1, %M0";
12873 otherops[1] = adjust_address (operands[1], SImode, 4);
12874 /* Take care of overlapping base/data reg. */
12875 if (reg_mentioned_p (operands[0], operands[1]))
12877 output_asm_insn ("ldr%?\t%0, %1", otherops);
12878 output_asm_insn ("ldr%?\t%0, %1", operands);
12882 output_asm_insn ("ldr%?\t%0, %1", operands);
12883 output_asm_insn ("ldr%?\t%0, %1", otherops);
12890 /* Constraints should ensure this. */
12891 gcc_assert (code0 == MEM && code1 == REG);
12892 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
12894 switch (GET_CODE (XEXP (operands[0], 0)))
12898 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
12900 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12904 gcc_assert (TARGET_LDRD);
12905 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
12910 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
12912 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
12917 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
12919 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
12923 gcc_assert (TARGET_LDRD);
12924 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
12929 otherops[0] = operands[1];
12930 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
12931 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
12933 /* IWMMXT allows offsets larger than ldrd can handle,
12934 fix these up with a pair of ldr. */
12936 && GET_CODE (otherops[2]) == CONST_INT
12937 && (INTVAL(otherops[2]) <= -256
12938 || INTVAL(otherops[2]) >= 256))
12940 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12942 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12943 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12947 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12948 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12951 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12952 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
12954 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
12958 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
12959 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12961 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
12964 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
12970 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
12976 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
12981 && (GET_CODE (otherops[2]) == REG
12983 || (GET_CODE (otherops[2]) == CONST_INT
12984 && INTVAL (otherops[2]) > -256
12985 && INTVAL (otherops[2]) < 256)))
12987 otherops[0] = operands[1];
12988 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
12989 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
12995 otherops[0] = adjust_address (operands[0], SImode, 4);
12996 otherops[1] = operands[1];
12997 output_asm_insn ("str%?\t%1, %0", operands);
12998 output_asm_insn ("str%?\t%H1, %0", otherops);
13005 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13006 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13009 output_move_quad (rtx *operands)
13011 if (REG_P (operands[0]))
13013 /* Load, or reg->reg move. */
13015 if (MEM_P (operands[1]))
13017 switch (GET_CODE (XEXP (operands[1], 0)))
13020 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13025 output_asm_insn ("adr%?\t%0, %1", operands);
13026 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13030 gcc_unreachable ();
13038 gcc_assert (REG_P (operands[1]));
13040 dest = REGNO (operands[0]);
13041 src = REGNO (operands[1]);
13043 /* This seems pretty dumb, but hopefully GCC won't try to do it
13046 for (i = 0; i < 4; i++)
13048 ops[0] = gen_rtx_REG (SImode, dest + i);
13049 ops[1] = gen_rtx_REG (SImode, src + i);
13050 output_asm_insn ("mov%?\t%0, %1", ops);
13053 for (i = 3; i >= 0; i--)
13055 ops[0] = gen_rtx_REG (SImode, dest + i);
13056 ops[1] = gen_rtx_REG (SImode, src + i);
13057 output_asm_insn ("mov%?\t%0, %1", ops);
13063 gcc_assert (MEM_P (operands[0]));
13064 gcc_assert (REG_P (operands[1]));
13065 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13067 switch (GET_CODE (XEXP (operands[0], 0)))
13070 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13074 gcc_unreachable ();
13081 /* Output a VFP load or store instruction. */
13084 output_move_vfp (rtx *operands)
13086 rtx reg, mem, addr, ops[2];
13087 int load = REG_P (operands[0]);
13088 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13089 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13092 enum machine_mode mode;
13094 reg = operands[!load];
13095 mem = operands[load];
13097 mode = GET_MODE (reg);
13099 gcc_assert (REG_P (reg));
13100 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13101 gcc_assert (mode == SFmode
13105 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13106 gcc_assert (MEM_P (mem));
13108 addr = XEXP (mem, 0);
13110 switch (GET_CODE (addr))
13113 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13114 ops[0] = XEXP (addr, 0);
13119 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13120 ops[0] = XEXP (addr, 0);
13125 templ = "f%s%c%%?\t%%%s0, %%1%s";
13131 sprintf (buff, templ,
13132 load ? "ld" : "st",
13135 integer_p ? "\t%@ int" : "");
13136 output_asm_insn (buff, ops);
13141 /* Output a Neon quad-word load or store, or a load or store for
13142 larger structure modes.
13144 WARNING: The ordering of elements is weird in big-endian mode,
13145 because we use VSTM, as required by the EABI. GCC RTL defines
13146 element ordering based on in-memory order. This can be differ
13147 from the architectural ordering of elements within a NEON register.
13148 The intrinsics defined in arm_neon.h use the NEON register element
13149 ordering, not the GCC RTL element ordering.
13151 For example, the in-memory ordering of a big-endian a quadword
13152 vector with 16-bit elements when stored from register pair {d0,d1}
13153 will be (lowest address first, d0[N] is NEON register element N):
13155 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13157 When necessary, quadword registers (dN, dN+1) are moved to ARM
13158 registers from rN in the order:
13160 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13162 So that STM/LDM can be used on vectors in ARM registers, and the
13163 same memory layout will result as if VSTM/VLDM were used. */
13166 output_move_neon (rtx *operands)
13168 rtx reg, mem, addr, ops[2];
13169 int regno, load = REG_P (operands[0]);
13172 enum machine_mode mode;
13174 reg = operands[!load];
13175 mem = operands[load];
13177 mode = GET_MODE (reg);
13179 gcc_assert (REG_P (reg));
13180 regno = REGNO (reg);
13181 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13182 || NEON_REGNO_OK_FOR_QUAD (regno));
13183 gcc_assert (VALID_NEON_DREG_MODE (mode)
13184 || VALID_NEON_QREG_MODE (mode)
13185 || VALID_NEON_STRUCT_MODE (mode));
13186 gcc_assert (MEM_P (mem));
13188 addr = XEXP (mem, 0);
13190 /* Strip off const from addresses like (const (plus (...))). */
13191 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13192 addr = XEXP (addr, 0);
13194 switch (GET_CODE (addr))
13197 templ = "v%smia%%?\t%%0!, %%h1";
13198 ops[0] = XEXP (addr, 0);
13203 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13204 templ = "v%smdb%%?\t%%0!, %%h1";
13205 ops[0] = XEXP (addr, 0);
13210 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13211 gcc_unreachable ();
13216 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13219 for (i = 0; i < nregs; i++)
13221 /* We're only using DImode here because it's a convenient size. */
13222 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13223 ops[1] = adjust_address (mem, DImode, 8 * i);
13224 if (reg_overlap_mentioned_p (ops[0], mem))
13226 gcc_assert (overlap == -1);
13231 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13232 output_asm_insn (buff, ops);
13237 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13238 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13239 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13240 output_asm_insn (buff, ops);
13247 templ = "v%smia%%?\t%%m0, %%h1";
13252 sprintf (buff, templ, load ? "ld" : "st");
13253 output_asm_insn (buff, ops);
13258 /* Compute and return the length of neon_mov<mode>, where <mode> is
13259 one of VSTRUCT modes: EI, OI, CI or XI. */
13261 arm_attr_length_move_neon (rtx insn)
13263 rtx reg, mem, addr;
13265 enum machine_mode mode;
13267 extract_insn_cached (insn);
13269 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13271 mode = GET_MODE (recog_data.operand[0]);
13282 gcc_unreachable ();
13286 load = REG_P (recog_data.operand[0]);
13287 reg = recog_data.operand[!load];
13288 mem = recog_data.operand[load];
13290 gcc_assert (MEM_P (mem));
13292 mode = GET_MODE (reg);
13293 addr = XEXP (mem, 0);
13295 /* Strip off const from addresses like (const (plus (...))). */
13296 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13297 addr = XEXP (addr, 0);
13299 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13301 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13308 /* Output an ADD r, s, #n where n may be too big for one instruction.
13309 If adding zero to one register, output nothing. */
13311 output_add_immediate (rtx *operands)
13313 HOST_WIDE_INT n = INTVAL (operands[2]);
13315 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13318 output_multi_immediate (operands,
13319 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13322 output_multi_immediate (operands,
13323 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13330 /* Output a multiple immediate operation.
13331 OPERANDS is the vector of operands referred to in the output patterns.
13332 INSTR1 is the output pattern to use for the first constant.
13333 INSTR2 is the output pattern to use for subsequent constants.
13334 IMMED_OP is the index of the constant slot in OPERANDS.
13335 N is the constant value. */
13336 static const char *
13337 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13338 int immed_op, HOST_WIDE_INT n)
13340 #if HOST_BITS_PER_WIDE_INT > 32
13346 /* Quick and easy output. */
13347 operands[immed_op] = const0_rtx;
13348 output_asm_insn (instr1, operands);
13353 const char * instr = instr1;
13355 /* Note that n is never zero here (which would give no output). */
13356 for (i = 0; i < 32; i += 2)
13360 operands[immed_op] = GEN_INT (n & (255 << i));
13361 output_asm_insn (instr, operands);
13371 /* Return the name of a shifter operation. */
13372 static const char *
13373 arm_shift_nmem(enum rtx_code code)
13378 return ARM_LSL_NAME;
13394 /* Return the appropriate ARM instruction for the operation code.
13395 The returned result should not be overwritten. OP is the rtx of the
13396 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13399 arithmetic_instr (rtx op, int shift_first_arg)
13401 switch (GET_CODE (op))
13407 return shift_first_arg ? "rsb" : "sub";
13422 return arm_shift_nmem(GET_CODE(op));
13425 gcc_unreachable ();
13429 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13430 for the operation code. The returned result should not be overwritten.
13431 OP is the rtx code of the shift.
13432 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13434 static const char *
13435 shift_op (rtx op, HOST_WIDE_INT *amountp)
13438 enum rtx_code code = GET_CODE (op);
13440 switch (GET_CODE (XEXP (op, 1)))
13448 *amountp = INTVAL (XEXP (op, 1));
13452 gcc_unreachable ();
13458 gcc_assert (*amountp != -1);
13459 *amountp = 32 - *amountp;
13462 /* Fall through. */
13468 mnem = arm_shift_nmem(code);
13472 /* We never have to worry about the amount being other than a
13473 power of 2, since this case can never be reloaded from a reg. */
13474 gcc_assert (*amountp != -1);
13475 *amountp = int_log2 (*amountp);
13476 return ARM_LSL_NAME;
13479 gcc_unreachable ();
13482 if (*amountp != -1)
13484 /* This is not 100% correct, but follows from the desire to merge
13485 multiplication by a power of 2 with the recognizer for a
13486 shift. >=32 is not a valid shift for "lsl", so we must try and
13487 output a shift that produces the correct arithmetical result.
13488 Using lsr #32 is identical except for the fact that the carry bit
13489 is not set correctly if we set the flags; but we never use the
13490 carry bit from such an operation, so we can ignore that. */
13491 if (code == ROTATERT)
13492 /* Rotate is just modulo 32. */
13494 else if (*amountp != (*amountp & 31))
13496 if (code == ASHIFT)
13501 /* Shifts of 0 are no-ops. */
13509 /* Obtain the shift from the POWER of two. */
13511 static HOST_WIDE_INT
13512 int_log2 (HOST_WIDE_INT power)
13514 HOST_WIDE_INT shift = 0;
13516 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13518 gcc_assert (shift <= 31);
13525 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13526 because /bin/as is horribly restrictive. The judgement about
13527 whether or not each character is 'printable' (and can be output as
13528 is) or not (and must be printed with an octal escape) must be made
13529 with reference to the *host* character set -- the situation is
13530 similar to that discussed in the comments above pp_c_char in
13531 c-pretty-print.c. */
13533 #define MAX_ASCII_LEN 51
13536 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13539 int len_so_far = 0;
13541 fputs ("\t.ascii\t\"", stream);
13543 for (i = 0; i < len; i++)
13547 if (len_so_far >= MAX_ASCII_LEN)
13549 fputs ("\"\n\t.ascii\t\"", stream);
13555 if (c == '\\' || c == '\"')
13557 putc ('\\', stream);
13565 fprintf (stream, "\\%03o", c);
13570 fputs ("\"\n", stream);
13573 /* Compute the register save mask for registers 0 through 12
13574 inclusive. This code is used by arm_compute_save_reg_mask. */
13576 static unsigned long
13577 arm_compute_save_reg0_reg12_mask (void)
13579 unsigned long func_type = arm_current_func_type ();
13580 unsigned long save_reg_mask = 0;
13583 if (IS_INTERRUPT (func_type))
13585 unsigned int max_reg;
13586 /* Interrupt functions must not corrupt any registers,
13587 even call clobbered ones. If this is a leaf function
13588 we can just examine the registers used by the RTL, but
13589 otherwise we have to assume that whatever function is
13590 called might clobber anything, and so we have to save
13591 all the call-clobbered registers as well. */
13592 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13593 /* FIQ handlers have registers r8 - r12 banked, so
13594 we only need to check r0 - r7, Normal ISRs only
13595 bank r14 and r15, so we must check up to r12.
13596 r13 is the stack pointer which is always preserved,
13597 so we do not need to consider it here. */
13602 for (reg = 0; reg <= max_reg; reg++)
13603 if (df_regs_ever_live_p (reg)
13604 || (! current_function_is_leaf && call_used_regs[reg]))
13605 save_reg_mask |= (1 << reg);
13607 /* Also save the pic base register if necessary. */
13609 && !TARGET_SINGLE_PIC_BASE
13610 && arm_pic_register != INVALID_REGNUM
13611 && crtl->uses_pic_offset_table)
13612 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13614 else if (IS_VOLATILE(func_type))
13616 /* For noreturn functions we historically omitted register saves
13617 altogether. However this really messes up debugging. As a
13618 compromise save just the frame pointers. Combined with the link
13619 register saved elsewhere this should be sufficient to get
13621 if (frame_pointer_needed)
13622 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13623 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13624 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13625 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13626 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13630 /* In the normal case we only need to save those registers
13631 which are call saved and which are used by this function. */
13632 for (reg = 0; reg <= 11; reg++)
13633 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13634 save_reg_mask |= (1 << reg);
13636 /* Handle the frame pointer as a special case. */
13637 if (frame_pointer_needed)
13638 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13640 /* If we aren't loading the PIC register,
13641 don't stack it even though it may be live. */
13643 && !TARGET_SINGLE_PIC_BASE
13644 && arm_pic_register != INVALID_REGNUM
13645 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13646 || crtl->uses_pic_offset_table))
13647 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13649 /* The prologue will copy SP into R0, so save it. */
13650 if (IS_STACKALIGN (func_type))
13651 save_reg_mask |= 1;
13654 /* Save registers so the exception handler can modify them. */
13655 if (crtl->calls_eh_return)
13661 reg = EH_RETURN_DATA_REGNO (i);
13662 if (reg == INVALID_REGNUM)
13664 save_reg_mask |= 1 << reg;
13668 return save_reg_mask;
13672 /* Compute the number of bytes used to store the static chain register on the
13673 stack, above the stack frame. We need to know this accurately to get the
13674 alignment of the rest of the stack frame correct. */
13676 static int arm_compute_static_chain_stack_bytes (void)
13678 unsigned long func_type = arm_current_func_type ();
13679 int static_chain_stack_bytes = 0;
13681 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13682 IS_NESTED (func_type) &&
13683 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13684 static_chain_stack_bytes = 4;
13686 return static_chain_stack_bytes;
13690 /* Compute a bit mask of which registers need to be
13691 saved on the stack for the current function.
13692 This is used by arm_get_frame_offsets, which may add extra registers. */
13694 static unsigned long
13695 arm_compute_save_reg_mask (void)
13697 unsigned int save_reg_mask = 0;
13698 unsigned long func_type = arm_current_func_type ();
13701 if (IS_NAKED (func_type))
13702 /* This should never really happen. */
13705 /* If we are creating a stack frame, then we must save the frame pointer,
13706 IP (which will hold the old stack pointer), LR and the PC. */
13707 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13709 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13712 | (1 << PC_REGNUM);
13714 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13716 /* Decide if we need to save the link register.
13717 Interrupt routines have their own banked link register,
13718 so they never need to save it.
13719 Otherwise if we do not use the link register we do not need to save
13720 it. If we are pushing other registers onto the stack however, we
13721 can save an instruction in the epilogue by pushing the link register
13722 now and then popping it back into the PC. This incurs extra memory
13723 accesses though, so we only do it when optimizing for size, and only
13724 if we know that we will not need a fancy return sequence. */
13725 if (df_regs_ever_live_p (LR_REGNUM)
13728 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13729 && !crtl->calls_eh_return))
13730 save_reg_mask |= 1 << LR_REGNUM;
13732 if (cfun->machine->lr_save_eliminated)
13733 save_reg_mask &= ~ (1 << LR_REGNUM);
13735 if (TARGET_REALLY_IWMMXT
13736 && ((bit_count (save_reg_mask)
13737 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13738 arm_compute_static_chain_stack_bytes())
13741 /* The total number of registers that are going to be pushed
13742 onto the stack is odd. We need to ensure that the stack
13743 is 64-bit aligned before we start to save iWMMXt registers,
13744 and also before we start to create locals. (A local variable
13745 might be a double or long long which we will load/store using
13746 an iWMMXt instruction). Therefore we need to push another
13747 ARM register, so that the stack will be 64-bit aligned. We
13748 try to avoid using the arg registers (r0 -r3) as they might be
13749 used to pass values in a tail call. */
13750 for (reg = 4; reg <= 12; reg++)
13751 if ((save_reg_mask & (1 << reg)) == 0)
13755 save_reg_mask |= (1 << reg);
13758 cfun->machine->sibcall_blocked = 1;
13759 save_reg_mask |= (1 << 3);
13763 /* We may need to push an additional register for use initializing the
13764 PIC base register. */
13765 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13766 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13768 reg = thumb_find_work_register (1 << 4);
13769 if (!call_used_regs[reg])
13770 save_reg_mask |= (1 << reg);
13773 return save_reg_mask;
13777 /* Compute a bit mask of which registers need to be
13778 saved on the stack for the current function. */
13779 static unsigned long
13780 thumb1_compute_save_reg_mask (void)
13782 unsigned long mask;
13786 for (reg = 0; reg < 12; reg ++)
13787 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13791 && !TARGET_SINGLE_PIC_BASE
13792 && arm_pic_register != INVALID_REGNUM
13793 && crtl->uses_pic_offset_table)
13794 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13796 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13797 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13798 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13800 /* LR will also be pushed if any lo regs are pushed. */
13801 if (mask & 0xff || thumb_force_lr_save ())
13802 mask |= (1 << LR_REGNUM);
13804 /* Make sure we have a low work register if we need one.
13805 We will need one if we are going to push a high register,
13806 but we are not currently intending to push a low register. */
13807 if ((mask & 0xff) == 0
13808 && ((mask & 0x0f00) || TARGET_BACKTRACE))
13810 /* Use thumb_find_work_register to choose which register
13811 we will use. If the register is live then we will
13812 have to push it. Use LAST_LO_REGNUM as our fallback
13813 choice for the register to select. */
13814 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
13815 /* Make sure the register returned by thumb_find_work_register is
13816 not part of the return value. */
13817 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
13818 reg = LAST_LO_REGNUM;
13820 if (! call_used_regs[reg])
13824 /* The 504 below is 8 bytes less than 512 because there are two possible
13825 alignment words. We can't tell here if they will be present or not so we
13826 have to play it safe and assume that they are. */
13827 if ((CALLER_INTERWORKING_SLOT_SIZE +
13828 ROUND_UP_WORD (get_frame_size ()) +
13829 crtl->outgoing_args_size) >= 504)
13831 /* This is the same as the code in thumb1_expand_prologue() which
13832 determines which register to use for stack decrement. */
13833 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
13834 if (mask & (1 << reg))
13837 if (reg > LAST_LO_REGNUM)
13839 /* Make sure we have a register available for stack decrement. */
13840 mask |= 1 << LAST_LO_REGNUM;
13848 /* Return the number of bytes required to save VFP registers. */
13850 arm_get_vfp_saved_size (void)
13852 unsigned int regno;
13857 /* Space for saved VFP registers. */
13858 if (TARGET_HARD_FLOAT && TARGET_VFP)
13861 for (regno = FIRST_VFP_REGNUM;
13862 regno < LAST_VFP_REGNUM;
13865 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
13866 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
13870 /* Workaround ARM10 VFPr1 bug. */
13871 if (count == 2 && !arm_arch6)
13873 saved += count * 8;
13882 if (count == 2 && !arm_arch6)
13884 saved += count * 8;
13891 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13892 everything bar the final return instruction. */
13894 output_return_instruction (rtx operand, int really_return, int reverse)
13896 char conditional[10];
13899 unsigned long live_regs_mask;
13900 unsigned long func_type;
13901 arm_stack_offsets *offsets;
13903 func_type = arm_current_func_type ();
13905 if (IS_NAKED (func_type))
13908 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13910 /* If this function was declared non-returning, and we have
13911 found a tail call, then we have to trust that the called
13912 function won't return. */
13917 /* Otherwise, trap an attempted return by aborting. */
13919 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
13921 assemble_external_libcall (ops[1]);
13922 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
13928 gcc_assert (!cfun->calls_alloca || really_return);
13930 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
13932 cfun->machine->return_used_this_function = 1;
13934 offsets = arm_get_frame_offsets ();
13935 live_regs_mask = offsets->saved_regs_mask;
13937 if (live_regs_mask)
13939 const char * return_reg;
13941 /* If we do not have any special requirements for function exit
13942 (e.g. interworking) then we can load the return address
13943 directly into the PC. Otherwise we must load it into LR. */
13945 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
13946 return_reg = reg_names[PC_REGNUM];
13948 return_reg = reg_names[LR_REGNUM];
13950 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
13952 /* There are three possible reasons for the IP register
13953 being saved. 1) a stack frame was created, in which case
13954 IP contains the old stack pointer, or 2) an ISR routine
13955 corrupted it, or 3) it was saved to align the stack on
13956 iWMMXt. In case 1, restore IP into SP, otherwise just
13958 if (frame_pointer_needed)
13960 live_regs_mask &= ~ (1 << IP_REGNUM);
13961 live_regs_mask |= (1 << SP_REGNUM);
13964 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
13967 /* On some ARM architectures it is faster to use LDR rather than
13968 LDM to load a single register. On other architectures, the
13969 cost is the same. In 26 bit mode, or for exception handlers,
13970 we have to use LDM to load the PC so that the CPSR is also
13972 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13973 if (live_regs_mask == (1U << reg))
13976 if (reg <= LAST_ARM_REGNUM
13977 && (reg != LR_REGNUM
13979 || ! IS_INTERRUPT (func_type)))
13981 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
13982 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
13989 /* Generate the load multiple instruction to restore the
13990 registers. Note we can get here, even if
13991 frame_pointer_needed is true, but only if sp already
13992 points to the base of the saved core registers. */
13993 if (live_regs_mask & (1 << SP_REGNUM))
13995 unsigned HOST_WIDE_INT stack_adjust;
13997 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
13998 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14000 if (stack_adjust && arm_arch5 && TARGET_ARM)
14001 if (TARGET_UNIFIED_ASM)
14002 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14004 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14007 /* If we can't use ldmib (SA110 bug),
14008 then try to pop r3 instead. */
14010 live_regs_mask |= 1 << 3;
14012 if (TARGET_UNIFIED_ASM)
14013 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14015 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14019 if (TARGET_UNIFIED_ASM)
14020 sprintf (instr, "pop%s\t{", conditional);
14022 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14024 p = instr + strlen (instr);
14026 for (reg = 0; reg <= SP_REGNUM; reg++)
14027 if (live_regs_mask & (1 << reg))
14029 int l = strlen (reg_names[reg]);
14035 memcpy (p, ", ", 2);
14039 memcpy (p, "%|", 2);
14040 memcpy (p + 2, reg_names[reg], l);
14044 if (live_regs_mask & (1 << LR_REGNUM))
14046 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14047 /* If returning from an interrupt, restore the CPSR. */
14048 if (IS_INTERRUPT (func_type))
14055 output_asm_insn (instr, & operand);
14057 /* See if we need to generate an extra instruction to
14058 perform the actual function return. */
14060 && func_type != ARM_FT_INTERWORKED
14061 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14063 /* The return has already been handled
14064 by loading the LR into the PC. */
14071 switch ((int) ARM_FUNC_TYPE (func_type))
14075 /* ??? This is wrong for unified assembly syntax. */
14076 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14079 case ARM_FT_INTERWORKED:
14080 sprintf (instr, "bx%s\t%%|lr", conditional);
14083 case ARM_FT_EXCEPTION:
14084 /* ??? This is wrong for unified assembly syntax. */
14085 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14089 /* Use bx if it's available. */
14090 if (arm_arch5 || arm_arch4t)
14091 sprintf (instr, "bx%s\t%%|lr", conditional);
14093 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14097 output_asm_insn (instr, & operand);
14103 /* Write the function name into the code section, directly preceding
14104 the function prologue.
14106 Code will be output similar to this:
14108 .ascii "arm_poke_function_name", 0
14111 .word 0xff000000 + (t1 - t0)
14112 arm_poke_function_name
14114 stmfd sp!, {fp, ip, lr, pc}
14117 When performing a stack backtrace, code can inspect the value
14118 of 'pc' stored at 'fp' + 0. If the trace function then looks
14119 at location pc - 12 and the top 8 bits are set, then we know
14120 that there is a function name embedded immediately preceding this
14121 location and has length ((pc[-3]) & 0xff000000).
14123 We assume that pc is declared as a pointer to an unsigned long.
14125 It is of no benefit to output the function name if we are assembling
14126 a leaf function. These function types will not contain a stack
14127 backtrace structure, therefore it is not possible to determine the
14130 arm_poke_function_name (FILE *stream, const char *name)
14132 unsigned long alignlength;
14133 unsigned long length;
14136 length = strlen (name) + 1;
14137 alignlength = ROUND_UP_WORD (length);
14139 ASM_OUTPUT_ASCII (stream, name, length);
14140 ASM_OUTPUT_ALIGN (stream, 2);
14141 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14142 assemble_aligned_integer (UNITS_PER_WORD, x);
14145 /* Place some comments into the assembler stream
14146 describing the current function. */
14148 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14150 unsigned long func_type;
14154 thumb1_output_function_prologue (f, frame_size);
14158 /* Sanity check. */
14159 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14161 func_type = arm_current_func_type ();
14163 switch ((int) ARM_FUNC_TYPE (func_type))
14166 case ARM_FT_NORMAL:
14168 case ARM_FT_INTERWORKED:
14169 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14172 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14175 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14177 case ARM_FT_EXCEPTION:
14178 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14182 if (IS_NAKED (func_type))
14183 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14185 if (IS_VOLATILE (func_type))
14186 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14188 if (IS_NESTED (func_type))
14189 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14190 if (IS_STACKALIGN (func_type))
14191 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14193 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14195 crtl->args.pretend_args_size, frame_size);
14197 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14198 frame_pointer_needed,
14199 cfun->machine->uses_anonymous_args);
14201 if (cfun->machine->lr_save_eliminated)
14202 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14204 if (crtl->calls_eh_return)
14205 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14210 arm_output_epilogue (rtx sibling)
14213 unsigned long saved_regs_mask;
14214 unsigned long func_type;
14215 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14216 frame that is $fp + 4 for a non-variadic function. */
14217 int floats_offset = 0;
14219 FILE * f = asm_out_file;
14220 unsigned int lrm_count = 0;
14221 int really_return = (sibling == NULL);
14223 arm_stack_offsets *offsets;
14225 /* If we have already generated the return instruction
14226 then it is futile to generate anything else. */
14227 if (use_return_insn (FALSE, sibling) &&
14228 (cfun->machine->return_used_this_function != 0))
14231 func_type = arm_current_func_type ();
14233 if (IS_NAKED (func_type))
14234 /* Naked functions don't have epilogues. */
14237 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14241 /* A volatile function should never return. Call abort. */
14242 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14243 assemble_external_libcall (op);
14244 output_asm_insn ("bl\t%a0", &op);
14249 /* If we are throwing an exception, then we really must be doing a
14250 return, so we can't tail-call. */
14251 gcc_assert (!crtl->calls_eh_return || really_return);
14253 offsets = arm_get_frame_offsets ();
14254 saved_regs_mask = offsets->saved_regs_mask;
14257 lrm_count = bit_count (saved_regs_mask);
14259 floats_offset = offsets->saved_args;
14260 /* Compute how far away the floats will be. */
14261 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14262 if (saved_regs_mask & (1 << reg))
14263 floats_offset += 4;
14265 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14267 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14268 int vfp_offset = offsets->frame;
14270 if (TARGET_FPA_EMU2)
14272 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14273 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14275 floats_offset += 12;
14276 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14277 reg, FP_REGNUM, floats_offset - vfp_offset);
14282 start_reg = LAST_FPA_REGNUM;
14284 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14286 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14288 floats_offset += 12;
14290 /* We can't unstack more than four registers at once. */
14291 if (start_reg - reg == 3)
14293 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14294 reg, FP_REGNUM, floats_offset - vfp_offset);
14295 start_reg = reg - 1;
14300 if (reg != start_reg)
14301 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14302 reg + 1, start_reg - reg,
14303 FP_REGNUM, floats_offset - vfp_offset);
14304 start_reg = reg - 1;
14308 /* Just in case the last register checked also needs unstacking. */
14309 if (reg != start_reg)
14310 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14311 reg + 1, start_reg - reg,
14312 FP_REGNUM, floats_offset - vfp_offset);
14315 if (TARGET_HARD_FLOAT && TARGET_VFP)
14319 /* The fldmd insns do not have base+offset addressing
14320 modes, so we use IP to hold the address. */
14321 saved_size = arm_get_vfp_saved_size ();
14323 if (saved_size > 0)
14325 floats_offset += saved_size;
14326 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14327 FP_REGNUM, floats_offset - vfp_offset);
14329 start_reg = FIRST_VFP_REGNUM;
14330 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14332 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14333 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14335 if (start_reg != reg)
14336 vfp_output_fldmd (f, IP_REGNUM,
14337 (start_reg - FIRST_VFP_REGNUM) / 2,
14338 (reg - start_reg) / 2);
14339 start_reg = reg + 2;
14342 if (start_reg != reg)
14343 vfp_output_fldmd (f, IP_REGNUM,
14344 (start_reg - FIRST_VFP_REGNUM) / 2,
14345 (reg - start_reg) / 2);
14350 /* The frame pointer is guaranteed to be non-double-word aligned.
14351 This is because it is set to (old_stack_pointer - 4) and the
14352 old_stack_pointer was double word aligned. Thus the offset to
14353 the iWMMXt registers to be loaded must also be non-double-word
14354 sized, so that the resultant address *is* double-word aligned.
14355 We can ignore floats_offset since that was already included in
14356 the live_regs_mask. */
14357 lrm_count += (lrm_count % 2 ? 2 : 1);
14359 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14360 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14362 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14363 reg, FP_REGNUM, lrm_count * 4);
14368 /* saved_regs_mask should contain the IP, which at the time of stack
14369 frame generation actually contains the old stack pointer. So a
14370 quick way to unwind the stack is just pop the IP register directly
14371 into the stack pointer. */
14372 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14373 saved_regs_mask &= ~ (1 << IP_REGNUM);
14374 saved_regs_mask |= (1 << SP_REGNUM);
14376 /* There are two registers left in saved_regs_mask - LR and PC. We
14377 only need to restore the LR register (the return address), but to
14378 save time we can load it directly into the PC, unless we need a
14379 special function exit sequence, or we are not really returning. */
14381 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14382 && !crtl->calls_eh_return)
14383 /* Delete the LR from the register mask, so that the LR on
14384 the stack is loaded into the PC in the register mask. */
14385 saved_regs_mask &= ~ (1 << LR_REGNUM);
14387 saved_regs_mask &= ~ (1 << PC_REGNUM);
14389 /* We must use SP as the base register, because SP is one of the
14390 registers being restored. If an interrupt or page fault
14391 happens in the ldm instruction, the SP might or might not
14392 have been restored. That would be bad, as then SP will no
14393 longer indicate the safe area of stack, and we can get stack
14394 corruption. Using SP as the base register means that it will
14395 be reset correctly to the original value, should an interrupt
14396 occur. If the stack pointer already points at the right
14397 place, then omit the subtraction. */
14398 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14399 || cfun->calls_alloca)
14400 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14401 4 * bit_count (saved_regs_mask));
14402 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14404 if (IS_INTERRUPT (func_type))
14405 /* Interrupt handlers will have pushed the
14406 IP onto the stack, so restore it now. */
14407 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14411 /* This branch is executed for ARM mode (non-apcs frames) and
14412 Thumb-2 mode. Frame layout is essentially the same for those
14413 cases, except that in ARM mode frame pointer points to the
14414 first saved register, while in Thumb-2 mode the frame pointer points
14415 to the last saved register.
14417 It is possible to make frame pointer point to last saved
14418 register in both cases, and remove some conditionals below.
14419 That means that fp setup in prologue would be just "mov fp, sp"
14420 and sp restore in epilogue would be just "mov sp, fp", whereas
14421 now we have to use add/sub in those cases. However, the value
14422 of that would be marginal, as both mov and add/sub are 32-bit
14423 in ARM mode, and it would require extra conditionals
14424 in arm_expand_prologue to distingish ARM-apcs-frame case
14425 (where frame pointer is required to point at first register)
14426 and ARM-non-apcs-frame. Therefore, such change is postponed
14427 until real need arise. */
14428 unsigned HOST_WIDE_INT amount;
14430 /* Restore stack pointer if necessary. */
14431 if (TARGET_ARM && frame_pointer_needed)
14433 operands[0] = stack_pointer_rtx;
14434 operands[1] = hard_frame_pointer_rtx;
14436 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14437 output_add_immediate (operands);
14441 if (frame_pointer_needed)
14443 /* For Thumb-2 restore sp from the frame pointer.
14444 Operand restrictions mean we have to incrememnt FP, then copy
14446 amount = offsets->locals_base - offsets->saved_regs;
14447 operands[0] = hard_frame_pointer_rtx;
14451 unsigned long count;
14452 operands[0] = stack_pointer_rtx;
14453 amount = offsets->outgoing_args - offsets->saved_regs;
14454 /* pop call clobbered registers if it avoids a
14455 separate stack adjustment. */
14456 count = offsets->saved_regs - offsets->saved_args;
14459 && !crtl->calls_eh_return
14460 && bit_count(saved_regs_mask) * 4 == count
14461 && !IS_INTERRUPT (func_type)
14462 && !crtl->tail_call_emit)
14464 unsigned long mask;
14465 mask = (1 << (arm_size_return_regs() / 4)) - 1;
14467 mask &= ~saved_regs_mask;
14469 while (bit_count (mask) * 4 > amount)
14471 while ((mask & (1 << reg)) == 0)
14473 mask &= ~(1 << reg);
14475 if (bit_count (mask) * 4 == amount) {
14477 saved_regs_mask |= mask;
14484 operands[1] = operands[0];
14485 operands[2] = GEN_INT (amount);
14486 output_add_immediate (operands);
14488 if (frame_pointer_needed)
14489 asm_fprintf (f, "\tmov\t%r, %r\n",
14490 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14493 if (TARGET_FPA_EMU2)
14495 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14496 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14497 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14502 start_reg = FIRST_FPA_REGNUM;
14504 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14506 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14508 if (reg - start_reg == 3)
14510 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14511 start_reg, SP_REGNUM);
14512 start_reg = reg + 1;
14517 if (reg != start_reg)
14518 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14519 start_reg, reg - start_reg,
14522 start_reg = reg + 1;
14526 /* Just in case the last register checked also needs unstacking. */
14527 if (reg != start_reg)
14528 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14529 start_reg, reg - start_reg, SP_REGNUM);
14532 if (TARGET_HARD_FLOAT && TARGET_VFP)
14534 int end_reg = LAST_VFP_REGNUM + 1;
14536 /* Scan the registers in reverse order. We need to match
14537 any groupings made in the prologue and generate matching
14539 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14541 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14542 && (!df_regs_ever_live_p (reg + 1)
14543 || call_used_regs[reg + 1]))
14545 if (end_reg > reg + 2)
14546 vfp_output_fldmd (f, SP_REGNUM,
14547 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14548 (end_reg - (reg + 2)) / 2);
14552 if (end_reg > reg + 2)
14553 vfp_output_fldmd (f, SP_REGNUM, 0,
14554 (end_reg - (reg + 2)) / 2);
14558 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14559 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14560 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14562 /* If we can, restore the LR into the PC. */
14563 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14564 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14565 && !IS_STACKALIGN (func_type)
14567 && crtl->args.pretend_args_size == 0
14568 && saved_regs_mask & (1 << LR_REGNUM)
14569 && !crtl->calls_eh_return)
14571 saved_regs_mask &= ~ (1 << LR_REGNUM);
14572 saved_regs_mask |= (1 << PC_REGNUM);
14573 rfe = IS_INTERRUPT (func_type);
14578 /* Load the registers off the stack. If we only have one register
14579 to load use the LDR instruction - it is faster. For Thumb-2
14580 always use pop and the assembler will pick the best instruction.*/
14581 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14582 && !IS_INTERRUPT(func_type))
14584 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14586 else if (saved_regs_mask)
14588 if (saved_regs_mask & (1 << SP_REGNUM))
14589 /* Note - write back to the stack register is not enabled
14590 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14591 in the list of registers and if we add writeback the
14592 instruction becomes UNPREDICTABLE. */
14593 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14595 else if (TARGET_ARM)
14596 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14599 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14602 if (crtl->args.pretend_args_size)
14604 /* Unwind the pre-pushed regs. */
14605 operands[0] = operands[1] = stack_pointer_rtx;
14606 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14607 output_add_immediate (operands);
14611 /* We may have already restored PC directly from the stack. */
14612 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14615 /* Stack adjustment for exception handler. */
14616 if (crtl->calls_eh_return)
14617 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14618 ARM_EH_STACKADJ_REGNUM);
14620 /* Generate the return instruction. */
14621 switch ((int) ARM_FUNC_TYPE (func_type))
14625 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14628 case ARM_FT_EXCEPTION:
14629 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14632 case ARM_FT_INTERWORKED:
14633 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14637 if (IS_STACKALIGN (func_type))
14639 /* See comment in arm_expand_prologue. */
14640 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14642 if (arm_arch5 || arm_arch4t)
14643 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14645 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14653 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14654 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14656 arm_stack_offsets *offsets;
14662 /* Emit any call-via-reg trampolines that are needed for v4t support
14663 of call_reg and call_value_reg type insns. */
14664 for (regno = 0; regno < LR_REGNUM; regno++)
14666 rtx label = cfun->machine->call_via[regno];
14670 switch_to_section (function_section (current_function_decl));
14671 targetm.asm_out.internal_label (asm_out_file, "L",
14672 CODE_LABEL_NUMBER (label));
14673 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14677 /* ??? Probably not safe to set this here, since it assumes that a
14678 function will be emitted as assembly immediately after we generate
14679 RTL for it. This does not happen for inline functions. */
14680 cfun->machine->return_used_this_function = 0;
14682 else /* TARGET_32BIT */
14684 /* We need to take into account any stack-frame rounding. */
14685 offsets = arm_get_frame_offsets ();
14687 gcc_assert (!use_return_insn (FALSE, NULL)
14688 || (cfun->machine->return_used_this_function != 0)
14689 || offsets->saved_regs == offsets->outgoing_args
14690 || frame_pointer_needed);
14692 /* Reset the ARM-specific per-function variables. */
14693 after_arm_reorg = 0;
14697 /* Generate and emit an insn that we will recognize as a push_multi.
14698 Unfortunately, since this insn does not reflect very well the actual
14699 semantics of the operation, we need to annotate the insn for the benefit
14700 of DWARF2 frame unwind information. */
14702 emit_multi_reg_push (unsigned long mask)
14705 int num_dwarf_regs;
14709 int dwarf_par_index;
14712 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14713 if (mask & (1 << i))
14716 gcc_assert (num_regs && num_regs <= 16);
14718 /* We don't record the PC in the dwarf frame information. */
14719 num_dwarf_regs = num_regs;
14720 if (mask & (1 << PC_REGNUM))
14723 /* For the body of the insn we are going to generate an UNSPEC in
14724 parallel with several USEs. This allows the insn to be recognized
14725 by the push_multi pattern in the arm.md file.
14727 The body of the insn looks something like this:
14730 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14731 (const_int:SI <num>)))
14732 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14738 For the frame note however, we try to be more explicit and actually
14739 show each register being stored into the stack frame, plus a (single)
14740 decrement of the stack pointer. We do it this way in order to be
14741 friendly to the stack unwinding code, which only wants to see a single
14742 stack decrement per instruction. The RTL we generate for the note looks
14743 something like this:
14746 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14747 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14748 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14749 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14753 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14754 instead we'd have a parallel expression detailing all
14755 the stores to the various memory addresses so that debug
14756 information is more up-to-date. Remember however while writing
14757 this to take care of the constraints with the push instruction.
14759 Note also that this has to be taken care of for the VFP registers.
14761 For more see PR43399. */
14763 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14764 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14765 dwarf_par_index = 1;
14767 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14769 if (mask & (1 << i))
14771 reg = gen_rtx_REG (SImode, i);
14773 XVECEXP (par, 0, 0)
14774 = gen_rtx_SET (VOIDmode,
14777 gen_rtx_PRE_MODIFY (Pmode,
14780 (stack_pointer_rtx,
14783 gen_rtx_UNSPEC (BLKmode,
14784 gen_rtvec (1, reg),
14785 UNSPEC_PUSH_MULT));
14787 if (i != PC_REGNUM)
14789 tmp = gen_rtx_SET (VOIDmode,
14790 gen_frame_mem (SImode, stack_pointer_rtx),
14792 RTX_FRAME_RELATED_P (tmp) = 1;
14793 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
14801 for (j = 1, i++; j < num_regs; i++)
14803 if (mask & (1 << i))
14805 reg = gen_rtx_REG (SImode, i);
14807 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
14809 if (i != PC_REGNUM)
14812 = gen_rtx_SET (VOIDmode,
14815 plus_constant (stack_pointer_rtx,
14818 RTX_FRAME_RELATED_P (tmp) = 1;
14819 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
14826 par = emit_insn (par);
14828 tmp = gen_rtx_SET (VOIDmode,
14830 plus_constant (stack_pointer_rtx, -4 * num_regs));
14831 RTX_FRAME_RELATED_P (tmp) = 1;
14832 XVECEXP (dwarf, 0, 0) = tmp;
14834 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14839 /* Calculate the size of the return value that is passed in registers. */
14841 arm_size_return_regs (void)
14843 enum machine_mode mode;
14845 if (crtl->return_rtx != 0)
14846 mode = GET_MODE (crtl->return_rtx);
14848 mode = DECL_MODE (DECL_RESULT (current_function_decl));
14850 return GET_MODE_SIZE (mode);
14854 emit_sfm (int base_reg, int count)
14861 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14862 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14864 reg = gen_rtx_REG (XFmode, base_reg++);
14866 XVECEXP (par, 0, 0)
14867 = gen_rtx_SET (VOIDmode,
14870 gen_rtx_PRE_MODIFY (Pmode,
14873 (stack_pointer_rtx,
14876 gen_rtx_UNSPEC (BLKmode,
14877 gen_rtvec (1, reg),
14878 UNSPEC_PUSH_MULT));
14879 tmp = gen_rtx_SET (VOIDmode,
14880 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
14881 RTX_FRAME_RELATED_P (tmp) = 1;
14882 XVECEXP (dwarf, 0, 1) = tmp;
14884 for (i = 1; i < count; i++)
14886 reg = gen_rtx_REG (XFmode, base_reg++);
14887 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14889 tmp = gen_rtx_SET (VOIDmode,
14890 gen_frame_mem (XFmode,
14891 plus_constant (stack_pointer_rtx,
14894 RTX_FRAME_RELATED_P (tmp) = 1;
14895 XVECEXP (dwarf, 0, i + 1) = tmp;
14898 tmp = gen_rtx_SET (VOIDmode,
14900 plus_constant (stack_pointer_rtx, -12 * count));
14902 RTX_FRAME_RELATED_P (tmp) = 1;
14903 XVECEXP (dwarf, 0, 0) = tmp;
14905 par = emit_insn (par);
14906 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14912 /* Return true if the current function needs to save/restore LR. */
14915 thumb_force_lr_save (void)
14917 return !cfun->machine->lr_save_eliminated
14918 && (!leaf_function_p ()
14919 || thumb_far_jump_used_p ()
14920 || df_regs_ever_live_p (LR_REGNUM));
14924 /* Compute the distance from register FROM to register TO.
14925 These can be the arg pointer (26), the soft frame pointer (25),
14926 the stack pointer (13) or the hard frame pointer (11).
14927 In thumb mode r7 is used as the soft frame pointer, if needed.
14928 Typical stack layout looks like this:
14930 old stack pointer -> | |
14933 | | saved arguments for
14934 | | vararg functions
14937 hard FP & arg pointer -> | | \
14945 soft frame pointer -> | | /
14950 locals base pointer -> | | /
14955 current stack pointer -> | | /
14958 For a given function some or all of these stack components
14959 may not be needed, giving rise to the possibility of
14960 eliminating some of the registers.
14962 The values returned by this function must reflect the behavior
14963 of arm_expand_prologue() and arm_compute_save_reg_mask().
14965 The sign of the number returned reflects the direction of stack
14966 growth, so the values are positive for all eliminations except
14967 from the soft frame pointer to the hard frame pointer.
14969 SFP may point just inside the local variables block to ensure correct
14973 /* Calculate stack offsets. These are used to calculate register elimination
14974 offsets and in prologue/epilogue code. Also calculates which registers
14975 should be saved. */
14977 static arm_stack_offsets *
14978 arm_get_frame_offsets (void)
14980 struct arm_stack_offsets *offsets;
14981 unsigned long func_type;
14985 HOST_WIDE_INT frame_size;
14988 offsets = &cfun->machine->stack_offsets;
14990 /* We need to know if we are a leaf function. Unfortunately, it
14991 is possible to be called after start_sequence has been called,
14992 which causes get_insns to return the insns for the sequence,
14993 not the function, which will cause leaf_function_p to return
14994 the incorrect result.
14996 to know about leaf functions once reload has completed, and the
14997 frame size cannot be changed after that time, so we can safely
14998 use the cached value. */
15000 if (reload_completed)
15003 /* Initially this is the size of the local variables. It will translated
15004 into an offset once we have determined the size of preceding data. */
15005 frame_size = ROUND_UP_WORD (get_frame_size ());
15007 leaf = leaf_function_p ();
15009 /* Space for variadic functions. */
15010 offsets->saved_args = crtl->args.pretend_args_size;
15012 /* In Thumb mode this is incorrect, but never used. */
15013 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15014 arm_compute_static_chain_stack_bytes();
15018 unsigned int regno;
15020 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15021 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15022 saved = core_saved;
15024 /* We know that SP will be doubleword aligned on entry, and we must
15025 preserve that condition at any subroutine call. We also require the
15026 soft frame pointer to be doubleword aligned. */
15028 if (TARGET_REALLY_IWMMXT)
15030 /* Check for the call-saved iWMMXt registers. */
15031 for (regno = FIRST_IWMMXT_REGNUM;
15032 regno <= LAST_IWMMXT_REGNUM;
15034 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15038 func_type = arm_current_func_type ();
15039 if (! IS_VOLATILE (func_type))
15041 /* Space for saved FPA registers. */
15042 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15043 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15046 /* Space for saved VFP registers. */
15047 if (TARGET_HARD_FLOAT && TARGET_VFP)
15048 saved += arm_get_vfp_saved_size ();
15051 else /* TARGET_THUMB1 */
15053 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15054 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15055 saved = core_saved;
15056 if (TARGET_BACKTRACE)
15060 /* Saved registers include the stack frame. */
15061 offsets->saved_regs = offsets->saved_args + saved +
15062 arm_compute_static_chain_stack_bytes();
15063 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15064 /* A leaf function does not need any stack alignment if it has nothing
15066 if (leaf && frame_size == 0)
15068 offsets->outgoing_args = offsets->soft_frame;
15069 offsets->locals_base = offsets->soft_frame;
15073 /* Ensure SFP has the correct alignment. */
15074 if (ARM_DOUBLEWORD_ALIGN
15075 && (offsets->soft_frame & 7))
15077 offsets->soft_frame += 4;
15078 /* Try to align stack by pushing an extra reg. Don't bother doing this
15079 when there is a stack frame as the alignment will be rolled into
15080 the normal stack adjustment. */
15081 if (frame_size + crtl->outgoing_args_size == 0)
15085 /* If it is safe to use r3, then do so. This sometimes
15086 generates better code on Thumb-2 by avoiding the need to
15087 use 32-bit push/pop instructions. */
15088 if (!crtl->tail_call_emit
15089 && arm_size_return_regs () <= 12
15090 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15095 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15097 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15106 offsets->saved_regs += 4;
15107 offsets->saved_regs_mask |= (1 << reg);
15112 offsets->locals_base = offsets->soft_frame + frame_size;
15113 offsets->outgoing_args = (offsets->locals_base
15114 + crtl->outgoing_args_size);
15116 if (ARM_DOUBLEWORD_ALIGN)
15118 /* Ensure SP remains doubleword aligned. */
15119 if (offsets->outgoing_args & 7)
15120 offsets->outgoing_args += 4;
15121 gcc_assert (!(offsets->outgoing_args & 7));
15128 /* Calculate the relative offsets for the different stack pointers. Positive
15129 offsets are in the direction of stack growth. */
15132 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15134 arm_stack_offsets *offsets;
15136 offsets = arm_get_frame_offsets ();
15138 /* OK, now we have enough information to compute the distances.
15139 There must be an entry in these switch tables for each pair
15140 of registers in ELIMINABLE_REGS, even if some of the entries
15141 seem to be redundant or useless. */
15144 case ARG_POINTER_REGNUM:
15147 case THUMB_HARD_FRAME_POINTER_REGNUM:
15150 case FRAME_POINTER_REGNUM:
15151 /* This is the reverse of the soft frame pointer
15152 to hard frame pointer elimination below. */
15153 return offsets->soft_frame - offsets->saved_args;
15155 case ARM_HARD_FRAME_POINTER_REGNUM:
15156 /* This is only non-zero in the case where the static chain register
15157 is stored above the frame. */
15158 return offsets->frame - offsets->saved_args - 4;
15160 case STACK_POINTER_REGNUM:
15161 /* If nothing has been pushed on the stack at all
15162 then this will return -4. This *is* correct! */
15163 return offsets->outgoing_args - (offsets->saved_args + 4);
15166 gcc_unreachable ();
15168 gcc_unreachable ();
15170 case FRAME_POINTER_REGNUM:
15173 case THUMB_HARD_FRAME_POINTER_REGNUM:
15176 case ARM_HARD_FRAME_POINTER_REGNUM:
15177 /* The hard frame pointer points to the top entry in the
15178 stack frame. The soft frame pointer to the bottom entry
15179 in the stack frame. If there is no stack frame at all,
15180 then they are identical. */
15182 return offsets->frame - offsets->soft_frame;
15184 case STACK_POINTER_REGNUM:
15185 return offsets->outgoing_args - offsets->soft_frame;
15188 gcc_unreachable ();
15190 gcc_unreachable ();
15193 /* You cannot eliminate from the stack pointer.
15194 In theory you could eliminate from the hard frame
15195 pointer to the stack pointer, but this will never
15196 happen, since if a stack frame is not needed the
15197 hard frame pointer will never be used. */
15198 gcc_unreachable ();
15202 /* Given FROM and TO register numbers, say whether this elimination is
15203 allowed. Frame pointer elimination is automatically handled.
15205 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15206 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15207 pointer, we must eliminate FRAME_POINTER_REGNUM into
15208 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15209 ARG_POINTER_REGNUM. */
15212 arm_can_eliminate (const int from, const int to)
15214 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15215 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15216 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15217 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15221 /* Emit RTL to save coprocessor registers on function entry. Returns the
15222 number of bytes pushed. */
15225 arm_save_coproc_regs(void)
15227 int saved_size = 0;
15229 unsigned start_reg;
15232 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15233 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15235 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15236 insn = gen_rtx_MEM (V2SImode, insn);
15237 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15238 RTX_FRAME_RELATED_P (insn) = 1;
15242 /* Save any floating point call-saved registers used by this
15244 if (TARGET_FPA_EMU2)
15246 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15247 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15249 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15250 insn = gen_rtx_MEM (XFmode, insn);
15251 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15252 RTX_FRAME_RELATED_P (insn) = 1;
15258 start_reg = LAST_FPA_REGNUM;
15260 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15262 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15264 if (start_reg - reg == 3)
15266 insn = emit_sfm (reg, 4);
15267 RTX_FRAME_RELATED_P (insn) = 1;
15269 start_reg = reg - 1;
15274 if (start_reg != reg)
15276 insn = emit_sfm (reg + 1, start_reg - reg);
15277 RTX_FRAME_RELATED_P (insn) = 1;
15278 saved_size += (start_reg - reg) * 12;
15280 start_reg = reg - 1;
15284 if (start_reg != reg)
15286 insn = emit_sfm (reg + 1, start_reg - reg);
15287 saved_size += (start_reg - reg) * 12;
15288 RTX_FRAME_RELATED_P (insn) = 1;
15291 if (TARGET_HARD_FLOAT && TARGET_VFP)
15293 start_reg = FIRST_VFP_REGNUM;
15295 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15297 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15298 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15300 if (start_reg != reg)
15301 saved_size += vfp_emit_fstmd (start_reg,
15302 (reg - start_reg) / 2);
15303 start_reg = reg + 2;
15306 if (start_reg != reg)
15307 saved_size += vfp_emit_fstmd (start_reg,
15308 (reg - start_reg) / 2);
15314 /* Set the Thumb frame pointer from the stack pointer. */
15317 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15319 HOST_WIDE_INT amount;
15322 amount = offsets->outgoing_args - offsets->locals_base;
15324 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15325 stack_pointer_rtx, GEN_INT (amount)));
15328 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15329 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15330 expects the first two operands to be the same. */
15333 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15335 hard_frame_pointer_rtx));
15339 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15340 hard_frame_pointer_rtx,
15341 stack_pointer_rtx));
15343 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15344 plus_constant (stack_pointer_rtx, amount));
15345 RTX_FRAME_RELATED_P (dwarf) = 1;
15346 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15349 RTX_FRAME_RELATED_P (insn) = 1;
15352 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15355 arm_expand_prologue (void)
15360 unsigned long live_regs_mask;
15361 unsigned long func_type;
15363 int saved_pretend_args = 0;
15364 int saved_regs = 0;
15365 unsigned HOST_WIDE_INT args_to_push;
15366 arm_stack_offsets *offsets;
15368 func_type = arm_current_func_type ();
15370 /* Naked functions don't have prologues. */
15371 if (IS_NAKED (func_type))
15374 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15375 args_to_push = crtl->args.pretend_args_size;
15377 /* Compute which register we will have to save onto the stack. */
15378 offsets = arm_get_frame_offsets ();
15379 live_regs_mask = offsets->saved_regs_mask;
15381 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15383 if (IS_STACKALIGN (func_type))
15388 /* Handle a word-aligned stack pointer. We generate the following:
15393 <save and restore r0 in normal prologue/epilogue>
15397 The unwinder doesn't need to know about the stack realignment.
15398 Just tell it we saved SP in r0. */
15399 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15401 r0 = gen_rtx_REG (SImode, 0);
15402 r1 = gen_rtx_REG (SImode, 1);
15403 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15404 compiler won't choke. */
15405 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15406 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15407 insn = gen_movsi (r0, stack_pointer_rtx);
15408 RTX_FRAME_RELATED_P (insn) = 1;
15409 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15411 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15412 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15415 /* For APCS frames, if IP register is clobbered
15416 when creating frame, save that register in a special
15418 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15420 if (IS_INTERRUPT (func_type))
15422 /* Interrupt functions must not corrupt any registers.
15423 Creating a frame pointer however, corrupts the IP
15424 register, so we must push it first. */
15425 insn = emit_multi_reg_push (1 << IP_REGNUM);
15427 /* Do not set RTX_FRAME_RELATED_P on this insn.
15428 The dwarf stack unwinding code only wants to see one
15429 stack decrement per function, and this is not it. If
15430 this instruction is labeled as being part of the frame
15431 creation sequence then dwarf2out_frame_debug_expr will
15432 die when it encounters the assignment of IP to FP
15433 later on, since the use of SP here establishes SP as
15434 the CFA register and not IP.
15436 Anyway this instruction is not really part of the stack
15437 frame creation although it is part of the prologue. */
15439 else if (IS_NESTED (func_type))
15441 /* The Static chain register is the same as the IP register
15442 used as a scratch register during stack frame creation.
15443 To get around this need to find somewhere to store IP
15444 whilst the frame is being created. We try the following
15447 1. The last argument register.
15448 2. A slot on the stack above the frame. (This only
15449 works if the function is not a varargs function).
15450 3. Register r3, after pushing the argument registers
15453 Note - we only need to tell the dwarf2 backend about the SP
15454 adjustment in the second variant; the static chain register
15455 doesn't need to be unwound, as it doesn't contain a value
15456 inherited from the caller. */
15458 if (df_regs_ever_live_p (3) == false)
15459 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15460 else if (args_to_push == 0)
15464 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15467 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15468 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15471 /* Just tell the dwarf backend that we adjusted SP. */
15472 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15473 plus_constant (stack_pointer_rtx,
15475 RTX_FRAME_RELATED_P (insn) = 1;
15476 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15480 /* Store the args on the stack. */
15481 if (cfun->machine->uses_anonymous_args)
15482 insn = emit_multi_reg_push
15483 ((0xf0 >> (args_to_push / 4)) & 0xf);
15486 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15487 GEN_INT (- args_to_push)));
15489 RTX_FRAME_RELATED_P (insn) = 1;
15491 saved_pretend_args = 1;
15492 fp_offset = args_to_push;
15495 /* Now reuse r3 to preserve IP. */
15496 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15500 insn = emit_set_insn (ip_rtx,
15501 plus_constant (stack_pointer_rtx, fp_offset));
15502 RTX_FRAME_RELATED_P (insn) = 1;
15507 /* Push the argument registers, or reserve space for them. */
15508 if (cfun->machine->uses_anonymous_args)
15509 insn = emit_multi_reg_push
15510 ((0xf0 >> (args_to_push / 4)) & 0xf);
15513 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15514 GEN_INT (- args_to_push)));
15515 RTX_FRAME_RELATED_P (insn) = 1;
15518 /* If this is an interrupt service routine, and the link register
15519 is going to be pushed, and we're not generating extra
15520 push of IP (needed when frame is needed and frame layout if apcs),
15521 subtracting four from LR now will mean that the function return
15522 can be done with a single instruction. */
15523 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15524 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15525 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15528 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15530 emit_set_insn (lr, plus_constant (lr, -4));
15533 if (live_regs_mask)
15535 saved_regs += bit_count (live_regs_mask) * 4;
15536 if (optimize_size && !frame_pointer_needed
15537 && saved_regs == offsets->saved_regs - offsets->saved_args)
15539 /* If no coprocessor registers are being pushed and we don't have
15540 to worry about a frame pointer then push extra registers to
15541 create the stack frame. This is done is a way that does not
15542 alter the frame layout, so is independent of the epilogue. */
15546 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15548 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15549 if (frame && n * 4 >= frame)
15552 live_regs_mask |= (1 << n) - 1;
15553 saved_regs += frame;
15556 insn = emit_multi_reg_push (live_regs_mask);
15557 RTX_FRAME_RELATED_P (insn) = 1;
15560 if (! IS_VOLATILE (func_type))
15561 saved_regs += arm_save_coproc_regs ();
15563 if (frame_pointer_needed && TARGET_ARM)
15565 /* Create the new frame pointer. */
15566 if (TARGET_APCS_FRAME)
15568 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15569 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15570 RTX_FRAME_RELATED_P (insn) = 1;
15572 if (IS_NESTED (func_type))
15574 /* Recover the static chain register. */
15575 if (!df_regs_ever_live_p (3)
15576 || saved_pretend_args)
15577 insn = gen_rtx_REG (SImode, 3);
15578 else /* if (crtl->args.pretend_args_size == 0) */
15580 insn = plus_constant (hard_frame_pointer_rtx, 4);
15581 insn = gen_frame_mem (SImode, insn);
15583 emit_set_insn (ip_rtx, insn);
15584 /* Add a USE to stop propagate_one_insn() from barfing. */
15585 emit_insn (gen_prologue_use (ip_rtx));
15590 insn = GEN_INT (saved_regs - 4);
15591 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15592 stack_pointer_rtx, insn));
15593 RTX_FRAME_RELATED_P (insn) = 1;
15597 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15599 /* This add can produce multiple insns for a large constant, so we
15600 need to get tricky. */
15601 rtx last = get_last_insn ();
15603 amount = GEN_INT (offsets->saved_args + saved_regs
15604 - offsets->outgoing_args);
15606 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15610 last = last ? NEXT_INSN (last) : get_insns ();
15611 RTX_FRAME_RELATED_P (last) = 1;
15613 while (last != insn);
15615 /* If the frame pointer is needed, emit a special barrier that
15616 will prevent the scheduler from moving stores to the frame
15617 before the stack adjustment. */
15618 if (frame_pointer_needed)
15619 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15620 hard_frame_pointer_rtx));
15624 if (frame_pointer_needed && TARGET_THUMB2)
15625 thumb_set_frame_pointer (offsets);
15627 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15629 unsigned long mask;
15631 mask = live_regs_mask;
15632 mask &= THUMB2_WORK_REGS;
15633 if (!IS_NESTED (func_type))
15634 mask |= (1 << IP_REGNUM);
15635 arm_load_pic_register (mask);
15638 /* If we are profiling, make sure no instructions are scheduled before
15639 the call to mcount. Similarly if the user has requested no
15640 scheduling in the prolog. Similarly if we want non-call exceptions
15641 using the EABI unwinder, to prevent faulting instructions from being
15642 swapped with a stack adjustment. */
15643 if (crtl->profile || !TARGET_SCHED_PROLOG
15644 || (ARM_EABI_UNWIND_TABLES && cfun->can_throw_non_call_exceptions))
15645 emit_insn (gen_blockage ());
15647 /* If the link register is being kept alive, with the return address in it,
15648 then make sure that it does not get reused by the ce2 pass. */
15649 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15650 cfun->machine->lr_save_eliminated = 1;
15653 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15655 arm_print_condition (FILE *stream)
15657 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15659 /* Branch conversion is not implemented for Thumb-2. */
15662 output_operand_lossage ("predicated Thumb instruction");
15665 if (current_insn_predicate != NULL)
15667 output_operand_lossage
15668 ("predicated instruction in conditional sequence");
15672 fputs (arm_condition_codes[arm_current_cc], stream);
15674 else if (current_insn_predicate)
15676 enum arm_cond_code code;
15680 output_operand_lossage ("predicated Thumb instruction");
15684 code = get_arm_condition_code (current_insn_predicate);
15685 fputs (arm_condition_codes[code], stream);
15690 /* If CODE is 'd', then the X is a condition operand and the instruction
15691 should only be executed if the condition is true.
15692 if CODE is 'D', then the X is a condition operand and the instruction
15693 should only be executed if the condition is false: however, if the mode
15694 of the comparison is CCFPEmode, then always execute the instruction -- we
15695 do this because in these circumstances !GE does not necessarily imply LT;
15696 in these cases the instruction pattern will take care to make sure that
15697 an instruction containing %d will follow, thereby undoing the effects of
15698 doing this instruction unconditionally.
15699 If CODE is 'N' then X is a floating point operand that must be negated
15701 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15702 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15704 arm_print_operand (FILE *stream, rtx x, int code)
15709 fputs (ASM_COMMENT_START, stream);
15713 fputs (user_label_prefix, stream);
15717 fputs (REGISTER_PREFIX, stream);
15721 arm_print_condition (stream);
15725 /* Nothing in unified syntax, otherwise the current condition code. */
15726 if (!TARGET_UNIFIED_ASM)
15727 arm_print_condition (stream);
15731 /* The current condition code in unified syntax, otherwise nothing. */
15732 if (TARGET_UNIFIED_ASM)
15733 arm_print_condition (stream);
15737 /* The current condition code for a condition code setting instruction.
15738 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15739 if (TARGET_UNIFIED_ASM)
15741 fputc('s', stream);
15742 arm_print_condition (stream);
15746 arm_print_condition (stream);
15747 fputc('s', stream);
15752 /* If the instruction is conditionally executed then print
15753 the current condition code, otherwise print 's'. */
15754 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15755 if (current_insn_predicate)
15756 arm_print_condition (stream);
15758 fputc('s', stream);
15761 /* %# is a "break" sequence. It doesn't output anything, but is used to
15762 separate e.g. operand numbers from following text, if that text consists
15763 of further digits which we don't want to be part of the operand
15771 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15772 r = real_value_negate (&r);
15773 fprintf (stream, "%s", fp_const_from_val (&r));
15777 /* An integer or symbol address without a preceding # sign. */
15779 switch (GET_CODE (x))
15782 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15786 output_addr_const (stream, x);
15790 gcc_unreachable ();
15795 if (GET_CODE (x) == CONST_INT)
15798 val = ARM_SIGN_EXTEND (~INTVAL (x));
15799 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
15803 putc ('~', stream);
15804 output_addr_const (stream, x);
15809 /* The low 16 bits of an immediate constant. */
15810 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
15814 fprintf (stream, "%s", arithmetic_instr (x, 1));
15817 /* Truncate Cirrus shift counts. */
15819 if (GET_CODE (x) == CONST_INT)
15821 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
15824 arm_print_operand (stream, x, 0);
15828 fprintf (stream, "%s", arithmetic_instr (x, 0));
15836 if (!shift_operator (x, SImode))
15838 output_operand_lossage ("invalid shift operand");
15842 shift = shift_op (x, &val);
15846 fprintf (stream, ", %s ", shift);
15848 arm_print_operand (stream, XEXP (x, 1), 0);
15850 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
15855 /* An explanation of the 'Q', 'R' and 'H' register operands:
15857 In a pair of registers containing a DI or DF value the 'Q'
15858 operand returns the register number of the register containing
15859 the least significant part of the value. The 'R' operand returns
15860 the register number of the register containing the most
15861 significant part of the value.
15863 The 'H' operand returns the higher of the two register numbers.
15864 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15865 same as the 'Q' operand, since the most significant part of the
15866 value is held in the lower number register. The reverse is true
15867 on systems where WORDS_BIG_ENDIAN is false.
15869 The purpose of these operands is to distinguish between cases
15870 where the endian-ness of the values is important (for example
15871 when they are added together), and cases where the endian-ness
15872 is irrelevant, but the order of register operations is important.
15873 For example when loading a value from memory into a register
15874 pair, the endian-ness does not matter. Provided that the value
15875 from the lower memory address is put into the lower numbered
15876 register, and the value from the higher address is put into the
15877 higher numbered register, the load will work regardless of whether
15878 the value being loaded is big-wordian or little-wordian. The
15879 order of the two register loads can matter however, if the address
15880 of the memory location is actually held in one of the registers
15881 being overwritten by the load.
15883 The 'Q' and 'R' constraints are also available for 64-bit
15886 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
15888 rtx part = gen_lowpart (SImode, x);
15889 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
15893 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15895 output_operand_lossage ("invalid operand for code '%c'", code);
15899 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
15903 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
15905 enum machine_mode mode = GET_MODE (x);
15908 if (mode == VOIDmode)
15910 part = gen_highpart_mode (SImode, mode, x);
15911 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
15915 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15917 output_operand_lossage ("invalid operand for code '%c'", code);
15921 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
15925 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15927 output_operand_lossage ("invalid operand for code '%c'", code);
15931 asm_fprintf (stream, "%r", REGNO (x) + 1);
15935 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15937 output_operand_lossage ("invalid operand for code '%c'", code);
15941 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
15945 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15947 output_operand_lossage ("invalid operand for code '%c'", code);
15951 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
15955 asm_fprintf (stream, "%r",
15956 GET_CODE (XEXP (x, 0)) == REG
15957 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
15961 asm_fprintf (stream, "{%r-%r}",
15963 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
15966 /* Like 'M', but writing doubleword vector registers, for use by Neon
15970 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
15971 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
15973 asm_fprintf (stream, "{d%d}", regno);
15975 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
15980 /* CONST_TRUE_RTX means always -- that's the default. */
15981 if (x == const_true_rtx)
15984 if (!COMPARISON_P (x))
15986 output_operand_lossage ("invalid operand for code '%c'", code);
15990 fputs (arm_condition_codes[get_arm_condition_code (x)],
15995 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
15996 want to do that. */
15997 if (x == const_true_rtx)
15999 output_operand_lossage ("instruction never executed");
16002 if (!COMPARISON_P (x))
16004 output_operand_lossage ("invalid operand for code '%c'", code);
16008 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16009 (get_arm_condition_code (x))],
16013 /* Cirrus registers can be accessed in a variety of ways:
16014 single floating point (f)
16015 double floating point (d)
16017 64bit integer (dx). */
16018 case 'W': /* Cirrus register in F mode. */
16019 case 'X': /* Cirrus register in D mode. */
16020 case 'Y': /* Cirrus register in FX mode. */
16021 case 'Z': /* Cirrus register in DX mode. */
16022 gcc_assert (GET_CODE (x) == REG
16023 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16025 fprintf (stream, "mv%s%s",
16027 : code == 'X' ? "d"
16028 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16032 /* Print cirrus register in the mode specified by the register's mode. */
16035 int mode = GET_MODE (x);
16037 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16039 output_operand_lossage ("invalid operand for code '%c'", code);
16043 fprintf (stream, "mv%s%s",
16044 mode == DFmode ? "d"
16045 : mode == SImode ? "fx"
16046 : mode == DImode ? "dx"
16047 : "f", reg_names[REGNO (x)] + 2);
16053 if (GET_CODE (x) != REG
16054 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16055 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16056 /* Bad value for wCG register number. */
16058 output_operand_lossage ("invalid operand for code '%c'", code);
16063 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16066 /* Print an iWMMXt control register name. */
16068 if (GET_CODE (x) != CONST_INT
16070 || INTVAL (x) >= 16)
16071 /* Bad value for wC register number. */
16073 output_operand_lossage ("invalid operand for code '%c'", code);
16079 static const char * wc_reg_names [16] =
16081 "wCID", "wCon", "wCSSF", "wCASF",
16082 "wC4", "wC5", "wC6", "wC7",
16083 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16084 "wC12", "wC13", "wC14", "wC15"
16087 fprintf (stream, wc_reg_names [INTVAL (x)]);
16091 /* Print the high single-precision register of a VFP double-precision
16095 int mode = GET_MODE (x);
16098 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16100 output_operand_lossage ("invalid operand for code '%c'", code);
16105 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16107 output_operand_lossage ("invalid operand for code '%c'", code);
16111 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16115 /* Print a VFP/Neon double precision or quad precision register name. */
16119 int mode = GET_MODE (x);
16120 int is_quad = (code == 'q');
16123 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16125 output_operand_lossage ("invalid operand for code '%c'", code);
16129 if (GET_CODE (x) != REG
16130 || !IS_VFP_REGNUM (REGNO (x)))
16132 output_operand_lossage ("invalid operand for code '%c'", code);
16137 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16138 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16140 output_operand_lossage ("invalid operand for code '%c'", code);
16144 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16145 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16149 /* These two codes print the low/high doubleword register of a Neon quad
16150 register, respectively. For pair-structure types, can also print
16151 low/high quadword registers. */
16155 int mode = GET_MODE (x);
16158 if ((GET_MODE_SIZE (mode) != 16
16159 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16161 output_operand_lossage ("invalid operand for code '%c'", code);
16166 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16168 output_operand_lossage ("invalid operand for code '%c'", code);
16172 if (GET_MODE_SIZE (mode) == 16)
16173 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16174 + (code == 'f' ? 1 : 0));
16176 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16177 + (code == 'f' ? 1 : 0));
16181 /* Print a VFPv3 floating-point constant, represented as an integer
16185 int index = vfp3_const_double_index (x);
16186 gcc_assert (index != -1);
16187 fprintf (stream, "%d", index);
16191 /* Print bits representing opcode features for Neon.
16193 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16194 and polynomials as unsigned.
16196 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16198 Bit 2 is 1 for rounding functions, 0 otherwise. */
16200 /* Identify the type as 's', 'u', 'p' or 'f'. */
16203 HOST_WIDE_INT bits = INTVAL (x);
16204 fputc ("uspf"[bits & 3], stream);
16208 /* Likewise, but signed and unsigned integers are both 'i'. */
16211 HOST_WIDE_INT bits = INTVAL (x);
16212 fputc ("iipf"[bits & 3], stream);
16216 /* As for 'T', but emit 'u' instead of 'p'. */
16219 HOST_WIDE_INT bits = INTVAL (x);
16220 fputc ("usuf"[bits & 3], stream);
16224 /* Bit 2: rounding (vs none). */
16227 HOST_WIDE_INT bits = INTVAL (x);
16228 fputs ((bits & 4) != 0 ? "r" : "", stream);
16232 /* Memory operand for vld1/vst1 instruction. */
16236 bool postinc = FALSE;
16237 gcc_assert (GET_CODE (x) == MEM);
16238 addr = XEXP (x, 0);
16239 if (GET_CODE (addr) == POST_INC)
16242 addr = XEXP (addr, 0);
16244 asm_fprintf (stream, "[%r]", REGNO (addr));
16246 fputs("!", stream);
16250 /* Translate an S register number into a D register number and element index. */
16253 int mode = GET_MODE (x);
16256 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16258 output_operand_lossage ("invalid operand for code '%c'", code);
16263 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16265 output_operand_lossage ("invalid operand for code '%c'", code);
16269 regno = regno - FIRST_VFP_REGNUM;
16270 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16274 /* Register specifier for vld1.16/vst1.16. Translate the S register
16275 number into a D register number and element index. */
16278 int mode = GET_MODE (x);
16281 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16283 output_operand_lossage ("invalid operand for code '%c'", code);
16288 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16290 output_operand_lossage ("invalid operand for code '%c'", code);
16294 regno = regno - FIRST_VFP_REGNUM;
16295 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16302 output_operand_lossage ("missing operand");
16306 switch (GET_CODE (x))
16309 asm_fprintf (stream, "%r", REGNO (x));
16313 output_memory_reference_mode = GET_MODE (x);
16314 output_address (XEXP (x, 0));
16321 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16322 sizeof (fpstr), 0, 1);
16323 fprintf (stream, "#%s", fpstr);
16326 fprintf (stream, "#%s", fp_immediate_constant (x));
16330 gcc_assert (GET_CODE (x) != NEG);
16331 fputc ('#', stream);
16332 if (GET_CODE (x) == HIGH)
16334 fputs (":lower16:", stream);
16338 output_addr_const (stream, x);
16344 /* Target hook for printing a memory address. */
16346 arm_print_operand_address (FILE *stream, rtx x)
16350 int is_minus = GET_CODE (x) == MINUS;
16352 if (GET_CODE (x) == REG)
16353 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16354 else if (GET_CODE (x) == PLUS || is_minus)
16356 rtx base = XEXP (x, 0);
16357 rtx index = XEXP (x, 1);
16358 HOST_WIDE_INT offset = 0;
16359 if (GET_CODE (base) != REG
16360 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16362 /* Ensure that BASE is a register. */
16363 /* (one of them must be). */
16364 /* Also ensure the SP is not used as in index register. */
16369 switch (GET_CODE (index))
16372 offset = INTVAL (index);
16375 asm_fprintf (stream, "[%r, #%wd]",
16376 REGNO (base), offset);
16380 asm_fprintf (stream, "[%r, %s%r]",
16381 REGNO (base), is_minus ? "-" : "",
16391 asm_fprintf (stream, "[%r, %s%r",
16392 REGNO (base), is_minus ? "-" : "",
16393 REGNO (XEXP (index, 0)));
16394 arm_print_operand (stream, index, 'S');
16395 fputs ("]", stream);
16400 gcc_unreachable ();
16403 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16404 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16406 extern enum machine_mode output_memory_reference_mode;
16408 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16410 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16411 asm_fprintf (stream, "[%r, #%s%d]!",
16412 REGNO (XEXP (x, 0)),
16413 GET_CODE (x) == PRE_DEC ? "-" : "",
16414 GET_MODE_SIZE (output_memory_reference_mode));
16416 asm_fprintf (stream, "[%r], #%s%d",
16417 REGNO (XEXP (x, 0)),
16418 GET_CODE (x) == POST_DEC ? "-" : "",
16419 GET_MODE_SIZE (output_memory_reference_mode));
16421 else if (GET_CODE (x) == PRE_MODIFY)
16423 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16424 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16425 asm_fprintf (stream, "#%wd]!",
16426 INTVAL (XEXP (XEXP (x, 1), 1)));
16428 asm_fprintf (stream, "%r]!",
16429 REGNO (XEXP (XEXP (x, 1), 1)));
16431 else if (GET_CODE (x) == POST_MODIFY)
16433 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16434 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16435 asm_fprintf (stream, "#%wd",
16436 INTVAL (XEXP (XEXP (x, 1), 1)));
16438 asm_fprintf (stream, "%r",
16439 REGNO (XEXP (XEXP (x, 1), 1)));
16441 else output_addr_const (stream, x);
16445 if (GET_CODE (x) == REG)
16446 asm_fprintf (stream, "[%r]", REGNO (x));
16447 else if (GET_CODE (x) == POST_INC)
16448 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16449 else if (GET_CODE (x) == PLUS)
16451 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16452 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16453 asm_fprintf (stream, "[%r, #%wd]",
16454 REGNO (XEXP (x, 0)),
16455 INTVAL (XEXP (x, 1)));
16457 asm_fprintf (stream, "[%r, %r]",
16458 REGNO (XEXP (x, 0)),
16459 REGNO (XEXP (x, 1)));
16462 output_addr_const (stream, x);
16466 /* Target hook for indicating whether a punctuation character for
16467 TARGET_PRINT_OPERAND is valid. */
16469 arm_print_operand_punct_valid_p (unsigned char code)
16471 return (code == '@' || code == '|' || code == '.'
16472 || code == '(' || code == ')' || code == '#'
16473 || (TARGET_32BIT && (code == '?'))
16474 || (TARGET_THUMB2 && (code == '!'))
16475 || (TARGET_THUMB && (code == '_')));
16478 /* Target hook for assembling integer objects. The ARM version needs to
16479 handle word-sized values specially. */
16481 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16483 enum machine_mode mode;
16485 if (size == UNITS_PER_WORD && aligned_p)
16487 fputs ("\t.word\t", asm_out_file);
16488 output_addr_const (asm_out_file, x);
16490 /* Mark symbols as position independent. We only do this in the
16491 .text segment, not in the .data segment. */
16492 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16493 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16495 /* See legitimize_pic_address for an explanation of the
16496 TARGET_VXWORKS_RTP check. */
16497 if (TARGET_VXWORKS_RTP
16498 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16499 fputs ("(GOT)", asm_out_file);
16501 fputs ("(GOTOFF)", asm_out_file);
16503 fputc ('\n', asm_out_file);
16507 mode = GET_MODE (x);
16509 if (arm_vector_mode_supported_p (mode))
16513 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16515 units = CONST_VECTOR_NUNITS (x);
16516 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16518 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16519 for (i = 0; i < units; i++)
16521 rtx elt = CONST_VECTOR_ELT (x, i);
16523 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16526 for (i = 0; i < units; i++)
16528 rtx elt = CONST_VECTOR_ELT (x, i);
16529 REAL_VALUE_TYPE rval;
16531 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16534 (rval, GET_MODE_INNER (mode),
16535 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16541 return default_assemble_integer (x, size, aligned_p);
16545 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16549 if (!TARGET_AAPCS_BASED)
16552 default_named_section_asm_out_constructor
16553 : default_named_section_asm_out_destructor) (symbol, priority);
16557 /* Put these in the .init_array section, using a special relocation. */
16558 if (priority != DEFAULT_INIT_PRIORITY)
16561 sprintf (buf, "%s.%.5u",
16562 is_ctor ? ".init_array" : ".fini_array",
16564 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16571 switch_to_section (s);
16572 assemble_align (POINTER_SIZE);
16573 fputs ("\t.word\t", asm_out_file);
16574 output_addr_const (asm_out_file, symbol);
16575 fputs ("(target1)\n", asm_out_file);
16578 /* Add a function to the list of static constructors. */
16581 arm_elf_asm_constructor (rtx symbol, int priority)
16583 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16586 /* Add a function to the list of static destructors. */
16589 arm_elf_asm_destructor (rtx symbol, int priority)
16591 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16594 /* A finite state machine takes care of noticing whether or not instructions
16595 can be conditionally executed, and thus decrease execution time and code
16596 size by deleting branch instructions. The fsm is controlled by
16597 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16599 /* The state of the fsm controlling condition codes are:
16600 0: normal, do nothing special
16601 1: make ASM_OUTPUT_OPCODE not output this instruction
16602 2: make ASM_OUTPUT_OPCODE not output this instruction
16603 3: make instructions conditional
16604 4: make instructions conditional
16606 State transitions (state->state by whom under condition):
16607 0 -> 1 final_prescan_insn if the `target' is a label
16608 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16609 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16610 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16611 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16612 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16613 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16614 (the target insn is arm_target_insn).
16616 If the jump clobbers the conditions then we use states 2 and 4.
16618 A similar thing can be done with conditional return insns.
16620 XXX In case the `target' is an unconditional branch, this conditionalising
16621 of the instructions always reduces code size, but not always execution
16622 time. But then, I want to reduce the code size to somewhere near what
16623 /bin/cc produces. */
16625 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16626 instructions. When a COND_EXEC instruction is seen the subsequent
16627 instructions are scanned so that multiple conditional instructions can be
16628 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16629 specify the length and true/false mask for the IT block. These will be
16630 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16632 /* Returns the index of the ARM condition code string in
16633 `arm_condition_codes'. COMPARISON should be an rtx like
16634 `(eq (...) (...))'. */
16635 static enum arm_cond_code
16636 get_arm_condition_code (rtx comparison)
16638 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16639 enum arm_cond_code code;
16640 enum rtx_code comp_code = GET_CODE (comparison);
16642 if (GET_MODE_CLASS (mode) != MODE_CC)
16643 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16644 XEXP (comparison, 1));
16648 case CC_DNEmode: code = ARM_NE; goto dominance;
16649 case CC_DEQmode: code = ARM_EQ; goto dominance;
16650 case CC_DGEmode: code = ARM_GE; goto dominance;
16651 case CC_DGTmode: code = ARM_GT; goto dominance;
16652 case CC_DLEmode: code = ARM_LE; goto dominance;
16653 case CC_DLTmode: code = ARM_LT; goto dominance;
16654 case CC_DGEUmode: code = ARM_CS; goto dominance;
16655 case CC_DGTUmode: code = ARM_HI; goto dominance;
16656 case CC_DLEUmode: code = ARM_LS; goto dominance;
16657 case CC_DLTUmode: code = ARM_CC;
16660 gcc_assert (comp_code == EQ || comp_code == NE);
16662 if (comp_code == EQ)
16663 return ARM_INVERSE_CONDITION_CODE (code);
16669 case NE: return ARM_NE;
16670 case EQ: return ARM_EQ;
16671 case GE: return ARM_PL;
16672 case LT: return ARM_MI;
16673 default: gcc_unreachable ();
16679 case NE: return ARM_NE;
16680 case EQ: return ARM_EQ;
16681 default: gcc_unreachable ();
16687 case NE: return ARM_MI;
16688 case EQ: return ARM_PL;
16689 default: gcc_unreachable ();
16694 /* These encodings assume that AC=1 in the FPA system control
16695 byte. This allows us to handle all cases except UNEQ and
16699 case GE: return ARM_GE;
16700 case GT: return ARM_GT;
16701 case LE: return ARM_LS;
16702 case LT: return ARM_MI;
16703 case NE: return ARM_NE;
16704 case EQ: return ARM_EQ;
16705 case ORDERED: return ARM_VC;
16706 case UNORDERED: return ARM_VS;
16707 case UNLT: return ARM_LT;
16708 case UNLE: return ARM_LE;
16709 case UNGT: return ARM_HI;
16710 case UNGE: return ARM_PL;
16711 /* UNEQ and LTGT do not have a representation. */
16712 case UNEQ: /* Fall through. */
16713 case LTGT: /* Fall through. */
16714 default: gcc_unreachable ();
16720 case NE: return ARM_NE;
16721 case EQ: return ARM_EQ;
16722 case GE: return ARM_LE;
16723 case GT: return ARM_LT;
16724 case LE: return ARM_GE;
16725 case LT: return ARM_GT;
16726 case GEU: return ARM_LS;
16727 case GTU: return ARM_CC;
16728 case LEU: return ARM_CS;
16729 case LTU: return ARM_HI;
16730 default: gcc_unreachable ();
16736 case LTU: return ARM_CS;
16737 case GEU: return ARM_CC;
16738 default: gcc_unreachable ();
16744 case NE: return ARM_NE;
16745 case EQ: return ARM_EQ;
16746 case GEU: return ARM_CS;
16747 case GTU: return ARM_HI;
16748 case LEU: return ARM_LS;
16749 case LTU: return ARM_CC;
16750 default: gcc_unreachable ();
16756 case GE: return ARM_GE;
16757 case LT: return ARM_LT;
16758 case GEU: return ARM_CS;
16759 case LTU: return ARM_CC;
16760 default: gcc_unreachable ();
16766 case NE: return ARM_NE;
16767 case EQ: return ARM_EQ;
16768 case GE: return ARM_GE;
16769 case GT: return ARM_GT;
16770 case LE: return ARM_LE;
16771 case LT: return ARM_LT;
16772 case GEU: return ARM_CS;
16773 case GTU: return ARM_HI;
16774 case LEU: return ARM_LS;
16775 case LTU: return ARM_CC;
16776 default: gcc_unreachable ();
16779 default: gcc_unreachable ();
16783 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
16786 thumb2_final_prescan_insn (rtx insn)
16788 rtx first_insn = insn;
16789 rtx body = PATTERN (insn);
16791 enum arm_cond_code code;
16795 /* Remove the previous insn from the count of insns to be output. */
16796 if (arm_condexec_count)
16797 arm_condexec_count--;
16799 /* Nothing to do if we are already inside a conditional block. */
16800 if (arm_condexec_count)
16803 if (GET_CODE (body) != COND_EXEC)
16806 /* Conditional jumps are implemented directly. */
16807 if (GET_CODE (insn) == JUMP_INSN)
16810 predicate = COND_EXEC_TEST (body);
16811 arm_current_cc = get_arm_condition_code (predicate);
16813 n = get_attr_ce_count (insn);
16814 arm_condexec_count = 1;
16815 arm_condexec_mask = (1 << n) - 1;
16816 arm_condexec_masklen = n;
16817 /* See if subsequent instructions can be combined into the same block. */
16820 insn = next_nonnote_insn (insn);
16822 /* Jumping into the middle of an IT block is illegal, so a label or
16823 barrier terminates the block. */
16824 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
16827 body = PATTERN (insn);
16828 /* USE and CLOBBER aren't really insns, so just skip them. */
16829 if (GET_CODE (body) == USE
16830 || GET_CODE (body) == CLOBBER)
16833 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
16834 if (GET_CODE (body) != COND_EXEC)
16836 /* Allow up to 4 conditionally executed instructions in a block. */
16837 n = get_attr_ce_count (insn);
16838 if (arm_condexec_masklen + n > 4)
16841 predicate = COND_EXEC_TEST (body);
16842 code = get_arm_condition_code (predicate);
16843 mask = (1 << n) - 1;
16844 if (arm_current_cc == code)
16845 arm_condexec_mask |= (mask << arm_condexec_masklen);
16846 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
16849 arm_condexec_count++;
16850 arm_condexec_masklen += n;
16852 /* A jump must be the last instruction in a conditional block. */
16853 if (GET_CODE(insn) == JUMP_INSN)
16856 /* Restore recog_data (getting the attributes of other insns can
16857 destroy this array, but final.c assumes that it remains intact
16858 across this call). */
16859 extract_constrain_insn_cached (first_insn);
16863 arm_final_prescan_insn (rtx insn)
16865 /* BODY will hold the body of INSN. */
16866 rtx body = PATTERN (insn);
16868 /* This will be 1 if trying to repeat the trick, and things need to be
16869 reversed if it appears to fail. */
16872 /* If we start with a return insn, we only succeed if we find another one. */
16873 int seeking_return = 0;
16875 /* START_INSN will hold the insn from where we start looking. This is the
16876 first insn after the following code_label if REVERSE is true. */
16877 rtx start_insn = insn;
16879 /* If in state 4, check if the target branch is reached, in order to
16880 change back to state 0. */
16881 if (arm_ccfsm_state == 4)
16883 if (insn == arm_target_insn)
16885 arm_target_insn = NULL;
16886 arm_ccfsm_state = 0;
16891 /* If in state 3, it is possible to repeat the trick, if this insn is an
16892 unconditional branch to a label, and immediately following this branch
16893 is the previous target label which is only used once, and the label this
16894 branch jumps to is not too far off. */
16895 if (arm_ccfsm_state == 3)
16897 if (simplejump_p (insn))
16899 start_insn = next_nonnote_insn (start_insn);
16900 if (GET_CODE (start_insn) == BARRIER)
16902 /* XXX Isn't this always a barrier? */
16903 start_insn = next_nonnote_insn (start_insn);
16905 if (GET_CODE (start_insn) == CODE_LABEL
16906 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16907 && LABEL_NUSES (start_insn) == 1)
16912 else if (GET_CODE (body) == RETURN)
16914 start_insn = next_nonnote_insn (start_insn);
16915 if (GET_CODE (start_insn) == BARRIER)
16916 start_insn = next_nonnote_insn (start_insn);
16917 if (GET_CODE (start_insn) == CODE_LABEL
16918 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16919 && LABEL_NUSES (start_insn) == 1)
16922 seeking_return = 1;
16931 gcc_assert (!arm_ccfsm_state || reverse);
16932 if (GET_CODE (insn) != JUMP_INSN)
16935 /* This jump might be paralleled with a clobber of the condition codes
16936 the jump should always come first */
16937 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
16938 body = XVECEXP (body, 0, 0);
16941 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
16942 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
16945 int fail = FALSE, succeed = FALSE;
16946 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
16947 int then_not_else = TRUE;
16948 rtx this_insn = start_insn, label = 0;
16950 /* Register the insn jumped to. */
16953 if (!seeking_return)
16954 label = XEXP (SET_SRC (body), 0);
16956 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
16957 label = XEXP (XEXP (SET_SRC (body), 1), 0);
16958 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
16960 label = XEXP (XEXP (SET_SRC (body), 2), 0);
16961 then_not_else = FALSE;
16963 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
16964 seeking_return = 1;
16965 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
16967 seeking_return = 1;
16968 then_not_else = FALSE;
16971 gcc_unreachable ();
16973 /* See how many insns this branch skips, and what kind of insns. If all
16974 insns are okay, and the label or unconditional branch to the same
16975 label is not too far away, succeed. */
16976 for (insns_skipped = 0;
16977 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
16981 this_insn = next_nonnote_insn (this_insn);
16985 switch (GET_CODE (this_insn))
16988 /* Succeed if it is the target label, otherwise fail since
16989 control falls in from somewhere else. */
16990 if (this_insn == label)
16992 arm_ccfsm_state = 1;
17000 /* Succeed if the following insn is the target label.
17002 If return insns are used then the last insn in a function
17003 will be a barrier. */
17004 this_insn = next_nonnote_insn (this_insn);
17005 if (this_insn && this_insn == label)
17007 arm_ccfsm_state = 1;
17015 /* The AAPCS says that conditional calls should not be
17016 used since they make interworking inefficient (the
17017 linker can't transform BL<cond> into BLX). That's
17018 only a problem if the machine has BLX. */
17025 /* Succeed if the following insn is the target label, or
17026 if the following two insns are a barrier and the
17028 this_insn = next_nonnote_insn (this_insn);
17029 if (this_insn && GET_CODE (this_insn) == BARRIER)
17030 this_insn = next_nonnote_insn (this_insn);
17032 if (this_insn && this_insn == label
17033 && insns_skipped < max_insns_skipped)
17035 arm_ccfsm_state = 1;
17043 /* If this is an unconditional branch to the same label, succeed.
17044 If it is to another label, do nothing. If it is conditional,
17046 /* XXX Probably, the tests for SET and the PC are
17049 scanbody = PATTERN (this_insn);
17050 if (GET_CODE (scanbody) == SET
17051 && GET_CODE (SET_DEST (scanbody)) == PC)
17053 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17054 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17056 arm_ccfsm_state = 2;
17059 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17062 /* Fail if a conditional return is undesirable (e.g. on a
17063 StrongARM), but still allow this if optimizing for size. */
17064 else if (GET_CODE (scanbody) == RETURN
17065 && !use_return_insn (TRUE, NULL)
17068 else if (GET_CODE (scanbody) == RETURN
17071 arm_ccfsm_state = 2;
17074 else if (GET_CODE (scanbody) == PARALLEL)
17076 switch (get_attr_conds (this_insn))
17086 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17091 /* Instructions using or affecting the condition codes make it
17093 scanbody = PATTERN (this_insn);
17094 if (!(GET_CODE (scanbody) == SET
17095 || GET_CODE (scanbody) == PARALLEL)
17096 || get_attr_conds (this_insn) != CONDS_NOCOND)
17099 /* A conditional cirrus instruction must be followed by
17100 a non Cirrus instruction. However, since we
17101 conditionalize instructions in this function and by
17102 the time we get here we can't add instructions
17103 (nops), because shorten_branches() has already been
17104 called, we will disable conditionalizing Cirrus
17105 instructions to be safe. */
17106 if (GET_CODE (scanbody) != USE
17107 && GET_CODE (scanbody) != CLOBBER
17108 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17118 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17119 arm_target_label = CODE_LABEL_NUMBER (label);
17122 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17124 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17126 this_insn = next_nonnote_insn (this_insn);
17127 gcc_assert (!this_insn
17128 || (GET_CODE (this_insn) != BARRIER
17129 && GET_CODE (this_insn) != CODE_LABEL));
17133 /* Oh, dear! we ran off the end.. give up. */
17134 extract_constrain_insn_cached (insn);
17135 arm_ccfsm_state = 0;
17136 arm_target_insn = NULL;
17139 arm_target_insn = this_insn;
17142 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17145 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17147 if (reverse || then_not_else)
17148 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17151 /* Restore recog_data (getting the attributes of other insns can
17152 destroy this array, but final.c assumes that it remains intact
17153 across this call. */
17154 extract_constrain_insn_cached (insn);
17158 /* Output IT instructions. */
17160 thumb2_asm_output_opcode (FILE * stream)
17165 if (arm_condexec_mask)
17167 for (n = 0; n < arm_condexec_masklen; n++)
17168 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17170 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17171 arm_condition_codes[arm_current_cc]);
17172 arm_condexec_mask = 0;
17176 /* Returns true if REGNO is a valid register
17177 for holding a quantity of type MODE. */
17179 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17181 if (GET_MODE_CLASS (mode) == MODE_CC)
17182 return (regno == CC_REGNUM
17183 || (TARGET_HARD_FLOAT && TARGET_VFP
17184 && regno == VFPCC_REGNUM));
17187 /* For the Thumb we only allow values bigger than SImode in
17188 registers 0 - 6, so that there is always a second low
17189 register available to hold the upper part of the value.
17190 We probably we ought to ensure that the register is the
17191 start of an even numbered register pair. */
17192 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17194 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17195 && IS_CIRRUS_REGNUM (regno))
17196 /* We have outlawed SI values in Cirrus registers because they
17197 reside in the lower 32 bits, but SF values reside in the
17198 upper 32 bits. This causes gcc all sorts of grief. We can't
17199 even split the registers into pairs because Cirrus SI values
17200 get sign extended to 64bits-- aldyh. */
17201 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17203 if (TARGET_HARD_FLOAT && TARGET_VFP
17204 && IS_VFP_REGNUM (regno))
17206 if (mode == SFmode || mode == SImode)
17207 return VFP_REGNO_OK_FOR_SINGLE (regno);
17209 if (mode == DFmode)
17210 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17212 /* VFP registers can hold HFmode values, but there is no point in
17213 putting them there unless we have hardware conversion insns. */
17214 if (mode == HFmode)
17215 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17218 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17219 || (VALID_NEON_QREG_MODE (mode)
17220 && NEON_REGNO_OK_FOR_QUAD (regno))
17221 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17222 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17223 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17224 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17225 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17230 if (TARGET_REALLY_IWMMXT)
17232 if (IS_IWMMXT_GR_REGNUM (regno))
17233 return mode == SImode;
17235 if (IS_IWMMXT_REGNUM (regno))
17236 return VALID_IWMMXT_REG_MODE (mode);
17239 /* We allow almost any value to be stored in the general registers.
17240 Restrict doubleword quantities to even register pairs so that we can
17241 use ldrd. Do not allow very large Neon structure opaque modes in
17242 general registers; they would use too many. */
17243 if (regno <= LAST_ARM_REGNUM)
17244 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17245 && ARM_NUM_REGS (mode) <= 4;
17247 if (regno == FRAME_POINTER_REGNUM
17248 || regno == ARG_POINTER_REGNUM)
17249 /* We only allow integers in the fake hard registers. */
17250 return GET_MODE_CLASS (mode) == MODE_INT;
17252 /* The only registers left are the FPA registers
17253 which we only allow to hold FP values. */
17254 return (TARGET_HARD_FLOAT && TARGET_FPA
17255 && GET_MODE_CLASS (mode) == MODE_FLOAT
17256 && regno >= FIRST_FPA_REGNUM
17257 && regno <= LAST_FPA_REGNUM);
17260 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17261 not used in arm mode. */
17264 arm_regno_class (int regno)
17268 if (regno == STACK_POINTER_REGNUM)
17270 if (regno == CC_REGNUM)
17277 if (TARGET_THUMB2 && regno < 8)
17280 if ( regno <= LAST_ARM_REGNUM
17281 || regno == FRAME_POINTER_REGNUM
17282 || regno == ARG_POINTER_REGNUM)
17283 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17285 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17286 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17288 if (IS_CIRRUS_REGNUM (regno))
17289 return CIRRUS_REGS;
17291 if (IS_VFP_REGNUM (regno))
17293 if (regno <= D7_VFP_REGNUM)
17294 return VFP_D0_D7_REGS;
17295 else if (regno <= LAST_LO_VFP_REGNUM)
17296 return VFP_LO_REGS;
17298 return VFP_HI_REGS;
17301 if (IS_IWMMXT_REGNUM (regno))
17302 return IWMMXT_REGS;
17304 if (IS_IWMMXT_GR_REGNUM (regno))
17305 return IWMMXT_GR_REGS;
17310 /* Handle a special case when computing the offset
17311 of an argument from the frame pointer. */
17313 arm_debugger_arg_offset (int value, rtx addr)
17317 /* We are only interested if dbxout_parms() failed to compute the offset. */
17321 /* We can only cope with the case where the address is held in a register. */
17322 if (GET_CODE (addr) != REG)
17325 /* If we are using the frame pointer to point at the argument, then
17326 an offset of 0 is correct. */
17327 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17330 /* If we are using the stack pointer to point at the
17331 argument, then an offset of 0 is correct. */
17332 /* ??? Check this is consistent with thumb2 frame layout. */
17333 if ((TARGET_THUMB || !frame_pointer_needed)
17334 && REGNO (addr) == SP_REGNUM)
17337 /* Oh dear. The argument is pointed to by a register rather
17338 than being held in a register, or being stored at a known
17339 offset from the frame pointer. Since GDB only understands
17340 those two kinds of argument we must translate the address
17341 held in the register into an offset from the frame pointer.
17342 We do this by searching through the insns for the function
17343 looking to see where this register gets its value. If the
17344 register is initialized from the frame pointer plus an offset
17345 then we are in luck and we can continue, otherwise we give up.
17347 This code is exercised by producing debugging information
17348 for a function with arguments like this:
17350 double func (double a, double b, int c, double d) {return d;}
17352 Without this code the stab for parameter 'd' will be set to
17353 an offset of 0 from the frame pointer, rather than 8. */
17355 /* The if() statement says:
17357 If the insn is a normal instruction
17358 and if the insn is setting the value in a register
17359 and if the register being set is the register holding the address of the argument
17360 and if the address is computing by an addition
17361 that involves adding to a register
17362 which is the frame pointer
17367 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17369 if ( GET_CODE (insn) == INSN
17370 && GET_CODE (PATTERN (insn)) == SET
17371 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17372 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17373 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17374 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17375 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17378 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17387 warning (0, "unable to compute real location of stacked parameter");
17388 value = 8; /* XXX magic hack */
17394 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17397 if ((MASK) & insn_flags) \
17398 add_builtin_function ((NAME), (TYPE), (CODE), \
17399 BUILT_IN_MD, NULL, NULL_TREE); \
17403 struct builtin_description
17405 const unsigned int mask;
17406 const enum insn_code icode;
17407 const char * const name;
17408 const enum arm_builtins code;
17409 const enum rtx_code comparison;
17410 const unsigned int flag;
17413 static const struct builtin_description bdesc_2arg[] =
17415 #define IWMMXT_BUILTIN(code, string, builtin) \
17416 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17417 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17419 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17420 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17421 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17422 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17423 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17424 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17425 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17426 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17427 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17428 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17429 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17430 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17431 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17432 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17433 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17434 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17435 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17436 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17437 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17438 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17439 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17440 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17441 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17442 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17443 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17444 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17445 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17446 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17447 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17448 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17449 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17450 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17451 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17452 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17453 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17454 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17455 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17456 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17457 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17458 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17459 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17460 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17461 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17462 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17463 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17464 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17465 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17466 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17467 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17468 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17469 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17470 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17471 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17472 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17473 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17474 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17475 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17476 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17478 #define IWMMXT_BUILTIN2(code, builtin) \
17479 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17481 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17482 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17483 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17484 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17485 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17486 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17487 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17488 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17489 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17490 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17491 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17492 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17493 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17494 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17495 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17496 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17497 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17498 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17499 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17500 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17501 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17502 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17503 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17504 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17505 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17506 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17507 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17508 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17509 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17510 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17511 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17512 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17515 static const struct builtin_description bdesc_1arg[] =
17517 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17518 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17519 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17520 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17521 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17522 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17523 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17524 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17525 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17526 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17527 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17528 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17529 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17530 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17531 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17532 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17533 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17534 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17537 /* Set up all the iWMMXt builtins. This is
17538 not called if TARGET_IWMMXT is zero. */
17541 arm_init_iwmmxt_builtins (void)
17543 const struct builtin_description * d;
17545 tree endlink = void_list_node;
17547 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17548 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17549 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17552 = build_function_type (integer_type_node,
17553 tree_cons (NULL_TREE, integer_type_node, endlink));
17554 tree v8qi_ftype_v8qi_v8qi_int
17555 = build_function_type (V8QI_type_node,
17556 tree_cons (NULL_TREE, V8QI_type_node,
17557 tree_cons (NULL_TREE, V8QI_type_node,
17558 tree_cons (NULL_TREE,
17561 tree v4hi_ftype_v4hi_int
17562 = build_function_type (V4HI_type_node,
17563 tree_cons (NULL_TREE, V4HI_type_node,
17564 tree_cons (NULL_TREE, integer_type_node,
17566 tree v2si_ftype_v2si_int
17567 = build_function_type (V2SI_type_node,
17568 tree_cons (NULL_TREE, V2SI_type_node,
17569 tree_cons (NULL_TREE, integer_type_node,
17571 tree v2si_ftype_di_di
17572 = build_function_type (V2SI_type_node,
17573 tree_cons (NULL_TREE, long_long_integer_type_node,
17574 tree_cons (NULL_TREE, long_long_integer_type_node,
17576 tree di_ftype_di_int
17577 = build_function_type (long_long_integer_type_node,
17578 tree_cons (NULL_TREE, long_long_integer_type_node,
17579 tree_cons (NULL_TREE, integer_type_node,
17581 tree di_ftype_di_int_int
17582 = build_function_type (long_long_integer_type_node,
17583 tree_cons (NULL_TREE, long_long_integer_type_node,
17584 tree_cons (NULL_TREE, integer_type_node,
17585 tree_cons (NULL_TREE,
17588 tree int_ftype_v8qi
17589 = build_function_type (integer_type_node,
17590 tree_cons (NULL_TREE, V8QI_type_node,
17592 tree int_ftype_v4hi
17593 = build_function_type (integer_type_node,
17594 tree_cons (NULL_TREE, V4HI_type_node,
17596 tree int_ftype_v2si
17597 = build_function_type (integer_type_node,
17598 tree_cons (NULL_TREE, V2SI_type_node,
17600 tree int_ftype_v8qi_int
17601 = build_function_type (integer_type_node,
17602 tree_cons (NULL_TREE, V8QI_type_node,
17603 tree_cons (NULL_TREE, integer_type_node,
17605 tree int_ftype_v4hi_int
17606 = build_function_type (integer_type_node,
17607 tree_cons (NULL_TREE, V4HI_type_node,
17608 tree_cons (NULL_TREE, integer_type_node,
17610 tree int_ftype_v2si_int
17611 = build_function_type (integer_type_node,
17612 tree_cons (NULL_TREE, V2SI_type_node,
17613 tree_cons (NULL_TREE, integer_type_node,
17615 tree v8qi_ftype_v8qi_int_int
17616 = build_function_type (V8QI_type_node,
17617 tree_cons (NULL_TREE, V8QI_type_node,
17618 tree_cons (NULL_TREE, integer_type_node,
17619 tree_cons (NULL_TREE,
17622 tree v4hi_ftype_v4hi_int_int
17623 = build_function_type (V4HI_type_node,
17624 tree_cons (NULL_TREE, V4HI_type_node,
17625 tree_cons (NULL_TREE, integer_type_node,
17626 tree_cons (NULL_TREE,
17629 tree v2si_ftype_v2si_int_int
17630 = build_function_type (V2SI_type_node,
17631 tree_cons (NULL_TREE, V2SI_type_node,
17632 tree_cons (NULL_TREE, integer_type_node,
17633 tree_cons (NULL_TREE,
17636 /* Miscellaneous. */
17637 tree v8qi_ftype_v4hi_v4hi
17638 = build_function_type (V8QI_type_node,
17639 tree_cons (NULL_TREE, V4HI_type_node,
17640 tree_cons (NULL_TREE, V4HI_type_node,
17642 tree v4hi_ftype_v2si_v2si
17643 = build_function_type (V4HI_type_node,
17644 tree_cons (NULL_TREE, V2SI_type_node,
17645 tree_cons (NULL_TREE, V2SI_type_node,
17647 tree v2si_ftype_v4hi_v4hi
17648 = build_function_type (V2SI_type_node,
17649 tree_cons (NULL_TREE, V4HI_type_node,
17650 tree_cons (NULL_TREE, V4HI_type_node,
17652 tree v2si_ftype_v8qi_v8qi
17653 = build_function_type (V2SI_type_node,
17654 tree_cons (NULL_TREE, V8QI_type_node,
17655 tree_cons (NULL_TREE, V8QI_type_node,
17657 tree v4hi_ftype_v4hi_di
17658 = build_function_type (V4HI_type_node,
17659 tree_cons (NULL_TREE, V4HI_type_node,
17660 tree_cons (NULL_TREE,
17661 long_long_integer_type_node,
17663 tree v2si_ftype_v2si_di
17664 = build_function_type (V2SI_type_node,
17665 tree_cons (NULL_TREE, V2SI_type_node,
17666 tree_cons (NULL_TREE,
17667 long_long_integer_type_node,
17669 tree void_ftype_int_int
17670 = build_function_type (void_type_node,
17671 tree_cons (NULL_TREE, integer_type_node,
17672 tree_cons (NULL_TREE, integer_type_node,
17675 = build_function_type (long_long_unsigned_type_node, endlink);
17677 = build_function_type (long_long_integer_type_node,
17678 tree_cons (NULL_TREE, V8QI_type_node,
17681 = build_function_type (long_long_integer_type_node,
17682 tree_cons (NULL_TREE, V4HI_type_node,
17685 = build_function_type (long_long_integer_type_node,
17686 tree_cons (NULL_TREE, V2SI_type_node,
17688 tree v2si_ftype_v4hi
17689 = build_function_type (V2SI_type_node,
17690 tree_cons (NULL_TREE, V4HI_type_node,
17692 tree v4hi_ftype_v8qi
17693 = build_function_type (V4HI_type_node,
17694 tree_cons (NULL_TREE, V8QI_type_node,
17697 tree di_ftype_di_v4hi_v4hi
17698 = build_function_type (long_long_unsigned_type_node,
17699 tree_cons (NULL_TREE,
17700 long_long_unsigned_type_node,
17701 tree_cons (NULL_TREE, V4HI_type_node,
17702 tree_cons (NULL_TREE,
17706 tree di_ftype_v4hi_v4hi
17707 = build_function_type (long_long_unsigned_type_node,
17708 tree_cons (NULL_TREE, V4HI_type_node,
17709 tree_cons (NULL_TREE, V4HI_type_node,
17712 /* Normal vector binops. */
17713 tree v8qi_ftype_v8qi_v8qi
17714 = build_function_type (V8QI_type_node,
17715 tree_cons (NULL_TREE, V8QI_type_node,
17716 tree_cons (NULL_TREE, V8QI_type_node,
17718 tree v4hi_ftype_v4hi_v4hi
17719 = build_function_type (V4HI_type_node,
17720 tree_cons (NULL_TREE, V4HI_type_node,
17721 tree_cons (NULL_TREE, V4HI_type_node,
17723 tree v2si_ftype_v2si_v2si
17724 = build_function_type (V2SI_type_node,
17725 tree_cons (NULL_TREE, V2SI_type_node,
17726 tree_cons (NULL_TREE, V2SI_type_node,
17728 tree di_ftype_di_di
17729 = build_function_type (long_long_unsigned_type_node,
17730 tree_cons (NULL_TREE, long_long_unsigned_type_node,
17731 tree_cons (NULL_TREE,
17732 long_long_unsigned_type_node,
17735 /* Add all builtins that are more or less simple operations on two
17737 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17739 /* Use one of the operands; the target can have a different mode for
17740 mask-generating compares. */
17741 enum machine_mode mode;
17747 mode = insn_data[d->icode].operand[1].mode;
17752 type = v8qi_ftype_v8qi_v8qi;
17755 type = v4hi_ftype_v4hi_v4hi;
17758 type = v2si_ftype_v2si_v2si;
17761 type = di_ftype_di_di;
17765 gcc_unreachable ();
17768 def_mbuiltin (d->mask, d->name, type, d->code);
17771 /* Add the remaining MMX insns with somewhat more complicated types. */
17772 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
17773 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
17774 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
17776 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
17777 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
17778 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
17779 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
17780 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
17781 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
17783 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
17784 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
17785 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
17786 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
17787 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
17788 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
17790 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
17791 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
17792 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
17793 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
17794 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
17795 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
17797 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
17798 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
17799 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
17800 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
17801 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
17802 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
17804 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
17806 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
17807 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
17808 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
17809 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
17811 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
17812 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
17813 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
17814 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
17815 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
17816 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
17817 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
17818 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
17819 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
17821 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
17822 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
17823 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
17825 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
17826 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
17827 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
17829 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
17830 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
17831 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
17832 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
17833 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
17834 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
17836 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
17837 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
17838 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
17839 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
17840 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
17841 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
17842 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
17843 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
17844 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
17845 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
17846 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
17847 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
17849 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
17850 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
17851 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
17852 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
17854 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
17855 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
17856 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
17857 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
17858 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
17859 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
17860 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
17864 arm_init_tls_builtins (void)
17868 ftype = build_function_type (ptr_type_node, void_list_node);
17869 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
17870 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
17872 TREE_NOTHROW (decl) = 1;
17873 TREE_READONLY (decl) = 1;
17876 enum neon_builtin_type_bits {
17892 #define v8qi_UP T_V8QI
17893 #define v4hi_UP T_V4HI
17894 #define v2si_UP T_V2SI
17895 #define v2sf_UP T_V2SF
17897 #define v16qi_UP T_V16QI
17898 #define v8hi_UP T_V8HI
17899 #define v4si_UP T_V4SI
17900 #define v4sf_UP T_V4SF
17901 #define v2di_UP T_V2DI
17906 #define UP(X) X##_UP
17941 NEON_LOADSTRUCTLANE,
17943 NEON_STORESTRUCTLANE,
17952 const neon_itype itype;
17954 const enum insn_code codes[T_MAX];
17955 const unsigned int num_vars;
17956 unsigned int base_fcode;
17957 } neon_builtin_datum;
17959 #define CF(N,X) CODE_FOR_neon_##N##X
17961 #define VAR1(T, N, A) \
17962 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
17963 #define VAR2(T, N, A, B) \
17964 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
17965 #define VAR3(T, N, A, B, C) \
17966 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
17967 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
17968 #define VAR4(T, N, A, B, C, D) \
17969 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
17970 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
17971 #define VAR5(T, N, A, B, C, D, E) \
17972 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
17973 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
17974 #define VAR6(T, N, A, B, C, D, E, F) \
17975 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
17976 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
17977 #define VAR7(T, N, A, B, C, D, E, F, G) \
17978 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
17979 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17981 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17982 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17984 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17985 CF (N, G), CF (N, H) }, 8, 0
17986 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17987 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17988 | UP (H) | UP (I), \
17989 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17990 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
17991 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17992 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17993 | UP (H) | UP (I) | UP (J), \
17994 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17995 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
17997 /* The mode entries in the following table correspond to the "key" type of the
17998 instruction variant, i.e. equivalent to that which would be specified after
17999 the assembler mnemonic, which usually refers to the last vector operand.
18000 (Signed/unsigned/polynomial types are not differentiated between though, and
18001 are all mapped onto the same mode for a given element size.) The modes
18002 listed per instruction should be the same as those defined for that
18003 instruction's pattern in neon.md.
18004 WARNING: Variants should be listed in the same increasing order as
18005 neon_builtin_type_bits. */
18007 static neon_builtin_datum neon_builtin_data[] =
18009 { VAR10 (BINOP, vadd,
18010 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18011 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
18012 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
18013 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18014 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18015 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
18016 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18017 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18018 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
18019 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18020 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
18021 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
18022 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
18023 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
18024 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
18025 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
18026 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
18027 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
18028 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
18029 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
18030 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
18031 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
18032 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18033 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18034 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18035 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
18036 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
18037 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
18038 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18039 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18040 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18041 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
18042 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18043 { VAR10 (BINOP, vsub,
18044 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18045 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
18046 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
18047 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18048 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18049 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
18050 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18051 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18052 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18053 { VAR2 (BINOP, vcage, v2sf, v4sf) },
18054 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
18055 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18056 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18057 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
18058 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18059 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
18060 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18061 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18062 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
18063 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18064 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18065 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
18066 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
18067 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
18068 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
18069 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18070 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18071 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18072 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18073 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18074 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18075 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18076 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18077 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
18078 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
18079 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
18080 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18081 /* FIXME: vget_lane supports more variants than this! */
18082 { VAR10 (GETLANE, vget_lane,
18083 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18084 { VAR10 (SETLANE, vset_lane,
18085 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18086 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
18087 { VAR10 (DUP, vdup_n,
18088 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18089 { VAR10 (DUPLANE, vdup_lane,
18090 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18091 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
18092 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
18093 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
18094 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
18095 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
18096 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
18097 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
18098 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18099 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18100 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
18101 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
18102 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18103 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
18104 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
18105 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18106 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18107 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
18108 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
18109 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18110 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
18111 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
18112 { VAR10 (BINOP, vext,
18113 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18114 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18115 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
18116 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
18117 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
18118 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
18119 { VAR10 (SELECT, vbsl,
18120 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18121 { VAR1 (VTBL, vtbl1, v8qi) },
18122 { VAR1 (VTBL, vtbl2, v8qi) },
18123 { VAR1 (VTBL, vtbl3, v8qi) },
18124 { VAR1 (VTBL, vtbl4, v8qi) },
18125 { VAR1 (VTBX, vtbx1, v8qi) },
18126 { VAR1 (VTBX, vtbx2, v8qi) },
18127 { VAR1 (VTBX, vtbx3, v8qi) },
18128 { VAR1 (VTBX, vtbx4, v8qi) },
18129 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18130 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18131 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18132 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
18133 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
18134 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
18135 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
18136 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
18137 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
18138 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
18139 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
18140 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
18141 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
18142 { VAR10 (LOAD1, vld1,
18143 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18144 { VAR10 (LOAD1LANE, vld1_lane,
18145 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18146 { VAR10 (LOAD1, vld1_dup,
18147 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18148 { VAR10 (STORE1, vst1,
18149 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18150 { VAR10 (STORE1LANE, vst1_lane,
18151 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18152 { VAR9 (LOADSTRUCT,
18153 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18154 { VAR7 (LOADSTRUCTLANE, vld2_lane,
18155 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18156 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
18157 { VAR9 (STORESTRUCT, vst2,
18158 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18159 { VAR7 (STORESTRUCTLANE, vst2_lane,
18160 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18161 { VAR9 (LOADSTRUCT,
18162 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18163 { VAR7 (LOADSTRUCTLANE, vld3_lane,
18164 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18165 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
18166 { VAR9 (STORESTRUCT, vst3,
18167 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18168 { VAR7 (STORESTRUCTLANE, vst3_lane,
18169 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18170 { VAR9 (LOADSTRUCT, vld4,
18171 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18172 { VAR7 (LOADSTRUCTLANE, vld4_lane,
18173 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18174 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
18175 { VAR9 (STORESTRUCT, vst4,
18176 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18177 { VAR7 (STORESTRUCTLANE, vst4_lane,
18178 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18179 { VAR10 (LOGICBINOP, vand,
18180 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18181 { VAR10 (LOGICBINOP, vorr,
18182 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18183 { VAR10 (BINOP, veor,
18184 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18185 { VAR10 (LOGICBINOP, vbic,
18186 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18187 { VAR10 (LOGICBINOP, vorn,
18188 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
18204 arm_init_neon_builtins (void)
18206 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
18208 tree neon_intQI_type_node;
18209 tree neon_intHI_type_node;
18210 tree neon_polyQI_type_node;
18211 tree neon_polyHI_type_node;
18212 tree neon_intSI_type_node;
18213 tree neon_intDI_type_node;
18214 tree neon_float_type_node;
18216 tree intQI_pointer_node;
18217 tree intHI_pointer_node;
18218 tree intSI_pointer_node;
18219 tree intDI_pointer_node;
18220 tree float_pointer_node;
18222 tree const_intQI_node;
18223 tree const_intHI_node;
18224 tree const_intSI_node;
18225 tree const_intDI_node;
18226 tree const_float_node;
18228 tree const_intQI_pointer_node;
18229 tree const_intHI_pointer_node;
18230 tree const_intSI_pointer_node;
18231 tree const_intDI_pointer_node;
18232 tree const_float_pointer_node;
18234 tree V8QI_type_node;
18235 tree V4HI_type_node;
18236 tree V2SI_type_node;
18237 tree V2SF_type_node;
18238 tree V16QI_type_node;
18239 tree V8HI_type_node;
18240 tree V4SI_type_node;
18241 tree V4SF_type_node;
18242 tree V2DI_type_node;
18244 tree intUQI_type_node;
18245 tree intUHI_type_node;
18246 tree intUSI_type_node;
18247 tree intUDI_type_node;
18249 tree intEI_type_node;
18250 tree intOI_type_node;
18251 tree intCI_type_node;
18252 tree intXI_type_node;
18254 tree V8QI_pointer_node;
18255 tree V4HI_pointer_node;
18256 tree V2SI_pointer_node;
18257 tree V2SF_pointer_node;
18258 tree V16QI_pointer_node;
18259 tree V8HI_pointer_node;
18260 tree V4SI_pointer_node;
18261 tree V4SF_pointer_node;
18262 tree V2DI_pointer_node;
18264 tree void_ftype_pv8qi_v8qi_v8qi;
18265 tree void_ftype_pv4hi_v4hi_v4hi;
18266 tree void_ftype_pv2si_v2si_v2si;
18267 tree void_ftype_pv2sf_v2sf_v2sf;
18268 tree void_ftype_pdi_di_di;
18269 tree void_ftype_pv16qi_v16qi_v16qi;
18270 tree void_ftype_pv8hi_v8hi_v8hi;
18271 tree void_ftype_pv4si_v4si_v4si;
18272 tree void_ftype_pv4sf_v4sf_v4sf;
18273 tree void_ftype_pv2di_v2di_v2di;
18275 tree reinterp_ftype_dreg[5][5];
18276 tree reinterp_ftype_qreg[5][5];
18277 tree dreg_types[5], qreg_types[5];
18279 /* Create distinguished type nodes for NEON vector element types,
18280 and pointers to values of such types, so we can detect them later. */
18281 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18282 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18283 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18284 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18285 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18286 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18287 neon_float_type_node = make_node (REAL_TYPE);
18288 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18289 layout_type (neon_float_type_node);
18291 /* Define typedefs which exactly correspond to the modes we are basing vector
18292 types on. If you change these names you'll need to change
18293 the table used by arm_mangle_type too. */
18294 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18295 "__builtin_neon_qi");
18296 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18297 "__builtin_neon_hi");
18298 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18299 "__builtin_neon_si");
18300 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18301 "__builtin_neon_sf");
18302 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18303 "__builtin_neon_di");
18304 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18305 "__builtin_neon_poly8");
18306 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18307 "__builtin_neon_poly16");
18309 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18310 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18311 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18312 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18313 float_pointer_node = build_pointer_type (neon_float_type_node);
18315 /* Next create constant-qualified versions of the above types. */
18316 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18318 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18320 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18322 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18324 const_float_node = build_qualified_type (neon_float_type_node,
18327 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18328 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18329 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18330 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18331 const_float_pointer_node = build_pointer_type (const_float_node);
18333 /* Now create vector types based on our NEON element types. */
18334 /* 64-bit vectors. */
18336 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18338 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18340 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18342 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18343 /* 128-bit vectors. */
18345 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18347 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18349 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18351 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18353 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18355 /* Unsigned integer types for various mode sizes. */
18356 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18357 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18358 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18359 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18361 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18362 "__builtin_neon_uqi");
18363 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18364 "__builtin_neon_uhi");
18365 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18366 "__builtin_neon_usi");
18367 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18368 "__builtin_neon_udi");
18370 /* Opaque integer types for structures of vectors. */
18371 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18372 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18373 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18374 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18376 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18377 "__builtin_neon_ti");
18378 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18379 "__builtin_neon_ei");
18380 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18381 "__builtin_neon_oi");
18382 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18383 "__builtin_neon_ci");
18384 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18385 "__builtin_neon_xi");
18387 /* Pointers to vector types. */
18388 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18389 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18390 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18391 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18392 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18393 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18394 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18395 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18396 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18398 /* Operations which return results as pairs. */
18399 void_ftype_pv8qi_v8qi_v8qi =
18400 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18401 V8QI_type_node, NULL);
18402 void_ftype_pv4hi_v4hi_v4hi =
18403 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18404 V4HI_type_node, NULL);
18405 void_ftype_pv2si_v2si_v2si =
18406 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18407 V2SI_type_node, NULL);
18408 void_ftype_pv2sf_v2sf_v2sf =
18409 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18410 V2SF_type_node, NULL);
18411 void_ftype_pdi_di_di =
18412 build_function_type_list (void_type_node, intDI_pointer_node,
18413 neon_intDI_type_node, neon_intDI_type_node, NULL);
18414 void_ftype_pv16qi_v16qi_v16qi =
18415 build_function_type_list (void_type_node, V16QI_pointer_node,
18416 V16QI_type_node, V16QI_type_node, NULL);
18417 void_ftype_pv8hi_v8hi_v8hi =
18418 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18419 V8HI_type_node, NULL);
18420 void_ftype_pv4si_v4si_v4si =
18421 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18422 V4SI_type_node, NULL);
18423 void_ftype_pv4sf_v4sf_v4sf =
18424 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18425 V4SF_type_node, NULL);
18426 void_ftype_pv2di_v2di_v2di =
18427 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18428 V2DI_type_node, NULL);
18430 dreg_types[0] = V8QI_type_node;
18431 dreg_types[1] = V4HI_type_node;
18432 dreg_types[2] = V2SI_type_node;
18433 dreg_types[3] = V2SF_type_node;
18434 dreg_types[4] = neon_intDI_type_node;
18436 qreg_types[0] = V16QI_type_node;
18437 qreg_types[1] = V8HI_type_node;
18438 qreg_types[2] = V4SI_type_node;
18439 qreg_types[3] = V4SF_type_node;
18440 qreg_types[4] = V2DI_type_node;
18442 for (i = 0; i < 5; i++)
18445 for (j = 0; j < 5; j++)
18447 reinterp_ftype_dreg[i][j]
18448 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18449 reinterp_ftype_qreg[i][j]
18450 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18454 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18456 neon_builtin_datum *d = &neon_builtin_data[i];
18457 unsigned int j, codeidx = 0;
18459 d->base_fcode = fcode;
18461 for (j = 0; j < T_MAX; j++)
18463 const char* const modenames[] = {
18464 "v8qi", "v4hi", "v2si", "v2sf", "di",
18465 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18469 enum insn_code icode;
18470 int is_load = 0, is_store = 0;
18472 if ((d->bits & (1 << j)) == 0)
18475 icode = d->codes[codeidx++];
18480 case NEON_LOAD1LANE:
18481 case NEON_LOADSTRUCT:
18482 case NEON_LOADSTRUCTLANE:
18484 /* Fall through. */
18486 case NEON_STORE1LANE:
18487 case NEON_STORESTRUCT:
18488 case NEON_STORESTRUCTLANE:
18491 /* Fall through. */
18494 case NEON_LOGICBINOP:
18495 case NEON_SHIFTINSERT:
18502 case NEON_SHIFTIMM:
18503 case NEON_SHIFTACC:
18509 case NEON_LANEMULL:
18510 case NEON_LANEMULH:
18512 case NEON_SCALARMUL:
18513 case NEON_SCALARMULL:
18514 case NEON_SCALARMULH:
18515 case NEON_SCALARMAC:
18521 tree return_type = void_type_node, args = void_list_node;
18523 /* Build a function type directly from the insn_data for this
18524 builtin. The build_function_type() function takes care of
18525 removing duplicates for us. */
18526 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
18530 if (is_load && k == 1)
18532 /* Neon load patterns always have the memory operand
18533 (a SImode pointer) in the operand 1 position. We
18534 want a const pointer to the element type in that
18536 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18542 eltype = const_intQI_pointer_node;
18547 eltype = const_intHI_pointer_node;
18552 eltype = const_intSI_pointer_node;
18557 eltype = const_float_pointer_node;
18562 eltype = const_intDI_pointer_node;
18565 default: gcc_unreachable ();
18568 else if (is_store && k == 0)
18570 /* Similarly, Neon store patterns use operand 0 as
18571 the memory location to store to (a SImode pointer).
18572 Use a pointer to the element type of the store in
18574 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18580 eltype = intQI_pointer_node;
18585 eltype = intHI_pointer_node;
18590 eltype = intSI_pointer_node;
18595 eltype = float_pointer_node;
18600 eltype = intDI_pointer_node;
18603 default: gcc_unreachable ();
18608 switch (insn_data[icode].operand[k].mode)
18610 case VOIDmode: eltype = void_type_node; break;
18612 case QImode: eltype = neon_intQI_type_node; break;
18613 case HImode: eltype = neon_intHI_type_node; break;
18614 case SImode: eltype = neon_intSI_type_node; break;
18615 case SFmode: eltype = neon_float_type_node; break;
18616 case DImode: eltype = neon_intDI_type_node; break;
18617 case TImode: eltype = intTI_type_node; break;
18618 case EImode: eltype = intEI_type_node; break;
18619 case OImode: eltype = intOI_type_node; break;
18620 case CImode: eltype = intCI_type_node; break;
18621 case XImode: eltype = intXI_type_node; break;
18622 /* 64-bit vectors. */
18623 case V8QImode: eltype = V8QI_type_node; break;
18624 case V4HImode: eltype = V4HI_type_node; break;
18625 case V2SImode: eltype = V2SI_type_node; break;
18626 case V2SFmode: eltype = V2SF_type_node; break;
18627 /* 128-bit vectors. */
18628 case V16QImode: eltype = V16QI_type_node; break;
18629 case V8HImode: eltype = V8HI_type_node; break;
18630 case V4SImode: eltype = V4SI_type_node; break;
18631 case V4SFmode: eltype = V4SF_type_node; break;
18632 case V2DImode: eltype = V2DI_type_node; break;
18633 default: gcc_unreachable ();
18637 if (k == 0 && !is_store)
18638 return_type = eltype;
18640 args = tree_cons (NULL_TREE, eltype, args);
18643 ftype = build_function_type (return_type, args);
18647 case NEON_RESULTPAIR:
18649 switch (insn_data[icode].operand[1].mode)
18651 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18652 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18653 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18654 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18655 case DImode: ftype = void_ftype_pdi_di_di; break;
18656 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18657 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18658 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18659 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18660 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18661 default: gcc_unreachable ();
18666 case NEON_REINTERP:
18668 /* We iterate over 5 doubleword types, then 5 quadword
18671 switch (insn_data[icode].operand[0].mode)
18673 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18674 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
18675 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
18676 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
18677 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
18678 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
18679 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
18680 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
18681 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
18682 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
18683 default: gcc_unreachable ();
18689 gcc_unreachable ();
18692 gcc_assert (ftype != NULL);
18694 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
18696 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
18703 arm_init_fp16_builtins (void)
18705 tree fp16_type = make_node (REAL_TYPE);
18706 TYPE_PRECISION (fp16_type) = 16;
18707 layout_type (fp16_type);
18708 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
18712 arm_init_builtins (void)
18714 arm_init_tls_builtins ();
18716 if (TARGET_REALLY_IWMMXT)
18717 arm_init_iwmmxt_builtins ();
18720 arm_init_neon_builtins ();
18722 if (arm_fp16_format)
18723 arm_init_fp16_builtins ();
18726 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18728 static const char *
18729 arm_invalid_parameter_type (const_tree t)
18731 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18732 return N_("function parameters cannot have __fp16 type");
18736 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18738 static const char *
18739 arm_invalid_return_type (const_tree t)
18741 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18742 return N_("functions cannot return __fp16 type");
18746 /* Implement TARGET_PROMOTED_TYPE. */
18749 arm_promoted_type (const_tree t)
18751 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
18752 return float_type_node;
18756 /* Implement TARGET_CONVERT_TO_TYPE.
18757 Specifically, this hook implements the peculiarity of the ARM
18758 half-precision floating-point C semantics that requires conversions between
18759 __fp16 to or from double to do an intermediate conversion to float. */
18762 arm_convert_to_type (tree type, tree expr)
18764 tree fromtype = TREE_TYPE (expr);
18765 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
18767 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
18768 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
18769 return convert (type, convert (float_type_node, expr));
18773 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
18774 This simply adds HFmode as a supported mode; even though we don't
18775 implement arithmetic on this type directly, it's supported by
18776 optabs conversions, much the way the double-word arithmetic is
18777 special-cased in the default hook. */
18780 arm_scalar_mode_supported_p (enum machine_mode mode)
18782 if (mode == HFmode)
18783 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
18785 return default_scalar_mode_supported_p (mode);
18788 /* Errors in the source file can cause expand_expr to return const0_rtx
18789 where we expect a vector. To avoid crashing, use one of the vector
18790 clear instructions. */
18793 safe_vector_operand (rtx x, enum machine_mode mode)
18795 if (x != const0_rtx)
18797 x = gen_reg_rtx (mode);
18799 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
18800 : gen_rtx_SUBREG (DImode, x, 0)));
18804 /* Subroutine of arm_expand_builtin to take care of binop insns. */
18807 arm_expand_binop_builtin (enum insn_code icode,
18808 tree exp, rtx target)
18811 tree arg0 = CALL_EXPR_ARG (exp, 0);
18812 tree arg1 = CALL_EXPR_ARG (exp, 1);
18813 rtx op0 = expand_normal (arg0);
18814 rtx op1 = expand_normal (arg1);
18815 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18816 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18817 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18819 if (VECTOR_MODE_P (mode0))
18820 op0 = safe_vector_operand (op0, mode0);
18821 if (VECTOR_MODE_P (mode1))
18822 op1 = safe_vector_operand (op1, mode1);
18825 || GET_MODE (target) != tmode
18826 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18827 target = gen_reg_rtx (tmode);
18829 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
18831 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18832 op0 = copy_to_mode_reg (mode0, op0);
18833 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18834 op1 = copy_to_mode_reg (mode1, op1);
18836 pat = GEN_FCN (icode) (target, op0, op1);
18843 /* Subroutine of arm_expand_builtin to take care of unop insns. */
18846 arm_expand_unop_builtin (enum insn_code icode,
18847 tree exp, rtx target, int do_load)
18850 tree arg0 = CALL_EXPR_ARG (exp, 0);
18851 rtx op0 = expand_normal (arg0);
18852 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18853 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18856 || GET_MODE (target) != tmode
18857 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18858 target = gen_reg_rtx (tmode);
18860 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18863 if (VECTOR_MODE_P (mode0))
18864 op0 = safe_vector_operand (op0, mode0);
18866 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18867 op0 = copy_to_mode_reg (mode0, op0);
18870 pat = GEN_FCN (icode) (target, op0);
18878 neon_builtin_compare (const void *a, const void *b)
18880 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
18881 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
18882 unsigned int soughtcode = key->base_fcode;
18884 if (soughtcode >= memb->base_fcode
18885 && soughtcode < memb->base_fcode + memb->num_vars)
18887 else if (soughtcode < memb->base_fcode)
18893 static enum insn_code
18894 locate_neon_builtin_icode (int fcode, neon_itype *itype)
18896 neon_builtin_datum key, *found;
18899 key.base_fcode = fcode;
18900 found = (neon_builtin_datum *)
18901 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
18902 sizeof (neon_builtin_data[0]), neon_builtin_compare);
18903 gcc_assert (found);
18904 idx = fcode - (int) found->base_fcode;
18905 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
18908 *itype = found->itype;
18910 return found->codes[idx];
18914 NEON_ARG_COPY_TO_REG,
18919 #define NEON_MAX_BUILTIN_ARGS 5
18921 /* Expand a Neon builtin. */
18923 arm_expand_neon_args (rtx target, int icode, int have_retval,
18928 tree arg[NEON_MAX_BUILTIN_ARGS];
18929 rtx op[NEON_MAX_BUILTIN_ARGS];
18930 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18931 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
18936 || GET_MODE (target) != tmode
18937 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
18938 target = gen_reg_rtx (tmode);
18940 va_start (ap, exp);
18944 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
18946 if (thisarg == NEON_ARG_STOP)
18950 arg[argc] = CALL_EXPR_ARG (exp, argc);
18951 op[argc] = expand_normal (arg[argc]);
18952 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
18956 case NEON_ARG_COPY_TO_REG:
18957 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
18958 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18959 (op[argc], mode[argc]))
18960 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
18963 case NEON_ARG_CONSTANT:
18964 /* FIXME: This error message is somewhat unhelpful. */
18965 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18966 (op[argc], mode[argc]))
18967 error ("argument must be a constant");
18970 case NEON_ARG_STOP:
18971 gcc_unreachable ();
18984 pat = GEN_FCN (icode) (target, op[0]);
18988 pat = GEN_FCN (icode) (target, op[0], op[1]);
18992 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
18996 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19000 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19004 gcc_unreachable ();
19010 pat = GEN_FCN (icode) (op[0]);
19014 pat = GEN_FCN (icode) (op[0], op[1]);
19018 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19022 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19026 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19030 gcc_unreachable ();
19041 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19042 constants defined per-instruction or per instruction-variant. Instead, the
19043 required info is looked up in the table neon_builtin_data. */
19045 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19048 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
19055 return arm_expand_neon_args (target, icode, 1, exp,
19056 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19060 case NEON_SCALARMUL:
19061 case NEON_SCALARMULL:
19062 case NEON_SCALARMULH:
19063 case NEON_SHIFTINSERT:
19064 case NEON_LOGICBINOP:
19065 return arm_expand_neon_args (target, icode, 1, exp,
19066 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19070 return arm_expand_neon_args (target, icode, 1, exp,
19071 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19072 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19076 case NEON_SHIFTIMM:
19077 return arm_expand_neon_args (target, icode, 1, exp,
19078 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19082 return arm_expand_neon_args (target, icode, 1, exp,
19083 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19087 case NEON_REINTERP:
19088 return arm_expand_neon_args (target, icode, 1, exp,
19089 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19093 return arm_expand_neon_args (target, icode, 1, exp,
19094 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19096 case NEON_RESULTPAIR:
19097 return arm_expand_neon_args (target, icode, 0, exp,
19098 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19102 case NEON_LANEMULL:
19103 case NEON_LANEMULH:
19104 return arm_expand_neon_args (target, icode, 1, exp,
19105 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19106 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19109 return arm_expand_neon_args (target, icode, 1, exp,
19110 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19111 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19113 case NEON_SHIFTACC:
19114 return arm_expand_neon_args (target, icode, 1, exp,
19115 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19116 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19118 case NEON_SCALARMAC:
19119 return arm_expand_neon_args (target, icode, 1, exp,
19120 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19121 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19125 return arm_expand_neon_args (target, icode, 1, exp,
19126 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19130 case NEON_LOADSTRUCT:
19131 return arm_expand_neon_args (target, icode, 1, exp,
19132 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19134 case NEON_LOAD1LANE:
19135 case NEON_LOADSTRUCTLANE:
19136 return arm_expand_neon_args (target, icode, 1, exp,
19137 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19141 case NEON_STORESTRUCT:
19142 return arm_expand_neon_args (target, icode, 0, exp,
19143 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19145 case NEON_STORE1LANE:
19146 case NEON_STORESTRUCTLANE:
19147 return arm_expand_neon_args (target, icode, 0, exp,
19148 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19152 gcc_unreachable ();
19155 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19157 neon_reinterpret (rtx dest, rtx src)
19159 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19162 /* Emit code to place a Neon pair result in memory locations (with equal
19165 neon_emit_pair_result_insn (enum machine_mode mode,
19166 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19169 rtx mem = gen_rtx_MEM (mode, destaddr);
19170 rtx tmp1 = gen_reg_rtx (mode);
19171 rtx tmp2 = gen_reg_rtx (mode);
19173 emit_insn (intfn (tmp1, op1, tmp2, op2));
19175 emit_move_insn (mem, tmp1);
19176 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19177 emit_move_insn (mem, tmp2);
19180 /* Set up operands for a register copy from src to dest, taking care not to
19181 clobber registers in the process.
19182 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19183 be called with a large N, so that should be OK. */
19186 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19188 unsigned int copied = 0, opctr = 0;
19189 unsigned int done = (1 << count) - 1;
19192 while (copied != done)
19194 for (i = 0; i < count; i++)
19198 for (j = 0; good && j < count; j++)
19199 if (i != j && (copied & (1 << j)) == 0
19200 && reg_overlap_mentioned_p (src[j], dest[i]))
19205 operands[opctr++] = dest[i];
19206 operands[opctr++] = src[i];
19212 gcc_assert (opctr == count * 2);
19215 /* Expand an expression EXP that calls a built-in function,
19216 with result going to TARGET if that's convenient
19217 (and in mode MODE if that's convenient).
19218 SUBTARGET may be used as the target for computing one of EXP's operands.
19219 IGNORE is nonzero if the value is to be ignored. */
19222 arm_expand_builtin (tree exp,
19224 rtx subtarget ATTRIBUTE_UNUSED,
19225 enum machine_mode mode ATTRIBUTE_UNUSED,
19226 int ignore ATTRIBUTE_UNUSED)
19228 const struct builtin_description * d;
19229 enum insn_code icode;
19230 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19238 int fcode = DECL_FUNCTION_CODE (fndecl);
19240 enum machine_mode tmode;
19241 enum machine_mode mode0;
19242 enum machine_mode mode1;
19243 enum machine_mode mode2;
19245 if (fcode >= ARM_BUILTIN_NEON_BASE)
19246 return arm_expand_neon_builtin (fcode, exp, target);
19250 case ARM_BUILTIN_TEXTRMSB:
19251 case ARM_BUILTIN_TEXTRMUB:
19252 case ARM_BUILTIN_TEXTRMSH:
19253 case ARM_BUILTIN_TEXTRMUH:
19254 case ARM_BUILTIN_TEXTRMSW:
19255 case ARM_BUILTIN_TEXTRMUW:
19256 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19257 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19258 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19259 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19260 : CODE_FOR_iwmmxt_textrmw);
19262 arg0 = CALL_EXPR_ARG (exp, 0);
19263 arg1 = CALL_EXPR_ARG (exp, 1);
19264 op0 = expand_normal (arg0);
19265 op1 = expand_normal (arg1);
19266 tmode = insn_data[icode].operand[0].mode;
19267 mode0 = insn_data[icode].operand[1].mode;
19268 mode1 = insn_data[icode].operand[2].mode;
19270 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19271 op0 = copy_to_mode_reg (mode0, op0);
19272 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19274 /* @@@ better error message */
19275 error ("selector must be an immediate");
19276 return gen_reg_rtx (tmode);
19279 || GET_MODE (target) != tmode
19280 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19281 target = gen_reg_rtx (tmode);
19282 pat = GEN_FCN (icode) (target, op0, op1);
19288 case ARM_BUILTIN_TINSRB:
19289 case ARM_BUILTIN_TINSRH:
19290 case ARM_BUILTIN_TINSRW:
19291 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19292 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19293 : CODE_FOR_iwmmxt_tinsrw);
19294 arg0 = CALL_EXPR_ARG (exp, 0);
19295 arg1 = CALL_EXPR_ARG (exp, 1);
19296 arg2 = CALL_EXPR_ARG (exp, 2);
19297 op0 = expand_normal (arg0);
19298 op1 = expand_normal (arg1);
19299 op2 = expand_normal (arg2);
19300 tmode = insn_data[icode].operand[0].mode;
19301 mode0 = insn_data[icode].operand[1].mode;
19302 mode1 = insn_data[icode].operand[2].mode;
19303 mode2 = insn_data[icode].operand[3].mode;
19305 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19306 op0 = copy_to_mode_reg (mode0, op0);
19307 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19308 op1 = copy_to_mode_reg (mode1, op1);
19309 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19311 /* @@@ better error message */
19312 error ("selector must be an immediate");
19316 || GET_MODE (target) != tmode
19317 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19318 target = gen_reg_rtx (tmode);
19319 pat = GEN_FCN (icode) (target, op0, op1, op2);
19325 case ARM_BUILTIN_SETWCX:
19326 arg0 = CALL_EXPR_ARG (exp, 0);
19327 arg1 = CALL_EXPR_ARG (exp, 1);
19328 op0 = force_reg (SImode, expand_normal (arg0));
19329 op1 = expand_normal (arg1);
19330 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19333 case ARM_BUILTIN_GETWCX:
19334 arg0 = CALL_EXPR_ARG (exp, 0);
19335 op0 = expand_normal (arg0);
19336 target = gen_reg_rtx (SImode);
19337 emit_insn (gen_iwmmxt_tmrc (target, op0));
19340 case ARM_BUILTIN_WSHUFH:
19341 icode = CODE_FOR_iwmmxt_wshufh;
19342 arg0 = CALL_EXPR_ARG (exp, 0);
19343 arg1 = CALL_EXPR_ARG (exp, 1);
19344 op0 = expand_normal (arg0);
19345 op1 = expand_normal (arg1);
19346 tmode = insn_data[icode].operand[0].mode;
19347 mode1 = insn_data[icode].operand[1].mode;
19348 mode2 = insn_data[icode].operand[2].mode;
19350 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19351 op0 = copy_to_mode_reg (mode1, op0);
19352 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19354 /* @@@ better error message */
19355 error ("mask must be an immediate");
19359 || GET_MODE (target) != tmode
19360 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19361 target = gen_reg_rtx (tmode);
19362 pat = GEN_FCN (icode) (target, op0, op1);
19368 case ARM_BUILTIN_WSADB:
19369 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19370 case ARM_BUILTIN_WSADH:
19371 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19372 case ARM_BUILTIN_WSADBZ:
19373 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19374 case ARM_BUILTIN_WSADHZ:
19375 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19377 /* Several three-argument builtins. */
19378 case ARM_BUILTIN_WMACS:
19379 case ARM_BUILTIN_WMACU:
19380 case ARM_BUILTIN_WALIGN:
19381 case ARM_BUILTIN_TMIA:
19382 case ARM_BUILTIN_TMIAPH:
19383 case ARM_BUILTIN_TMIATT:
19384 case ARM_BUILTIN_TMIATB:
19385 case ARM_BUILTIN_TMIABT:
19386 case ARM_BUILTIN_TMIABB:
19387 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19388 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19389 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19390 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19391 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19392 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19393 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19394 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19395 : CODE_FOR_iwmmxt_walign);
19396 arg0 = CALL_EXPR_ARG (exp, 0);
19397 arg1 = CALL_EXPR_ARG (exp, 1);
19398 arg2 = CALL_EXPR_ARG (exp, 2);
19399 op0 = expand_normal (arg0);
19400 op1 = expand_normal (arg1);
19401 op2 = expand_normal (arg2);
19402 tmode = insn_data[icode].operand[0].mode;
19403 mode0 = insn_data[icode].operand[1].mode;
19404 mode1 = insn_data[icode].operand[2].mode;
19405 mode2 = insn_data[icode].operand[3].mode;
19407 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19408 op0 = copy_to_mode_reg (mode0, op0);
19409 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19410 op1 = copy_to_mode_reg (mode1, op1);
19411 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19412 op2 = copy_to_mode_reg (mode2, op2);
19414 || GET_MODE (target) != tmode
19415 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19416 target = gen_reg_rtx (tmode);
19417 pat = GEN_FCN (icode) (target, op0, op1, op2);
19423 case ARM_BUILTIN_WZERO:
19424 target = gen_reg_rtx (DImode);
19425 emit_insn (gen_iwmmxt_clrdi (target));
19428 case ARM_BUILTIN_THREAD_POINTER:
19429 return arm_load_tp (target);
19435 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19436 if (d->code == (const enum arm_builtins) fcode)
19437 return arm_expand_binop_builtin (d->icode, exp, target);
19439 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19440 if (d->code == (const enum arm_builtins) fcode)
19441 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19443 /* @@@ Should really do something sensible here. */
19447 /* Return the number (counting from 0) of
19448 the least significant set bit in MASK. */
19451 number_of_first_bit_set (unsigned mask)
19456 (mask & (1 << bit)) == 0;
19463 /* Emit code to push or pop registers to or from the stack. F is the
19464 assembly file. MASK is the registers to push or pop. PUSH is
19465 nonzero if we should push, and zero if we should pop. For debugging
19466 output, if pushing, adjust CFA_OFFSET by the amount of space added
19467 to the stack. REAL_REGS should have the same number of bits set as
19468 MASK, and will be used instead (in the same order) to describe which
19469 registers were saved - this is used to mark the save slots when we
19470 push high registers after moving them to low registers. */
19472 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19473 unsigned long real_regs)
19476 int lo_mask = mask & 0xFF;
19477 int pushed_words = 0;
19481 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19483 /* Special case. Do not generate a POP PC statement here, do it in
19485 thumb_exit (f, -1);
19489 if (ARM_EABI_UNWIND_TABLES && push)
19491 fprintf (f, "\t.save\t{");
19492 for (regno = 0; regno < 15; regno++)
19494 if (real_regs & (1 << regno))
19496 if (real_regs & ((1 << regno) -1))
19498 asm_fprintf (f, "%r", regno);
19501 fprintf (f, "}\n");
19504 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19506 /* Look at the low registers first. */
19507 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19511 asm_fprintf (f, "%r", regno);
19513 if ((lo_mask & ~1) != 0)
19520 if (push && (mask & (1 << LR_REGNUM)))
19522 /* Catch pushing the LR. */
19526 asm_fprintf (f, "%r", LR_REGNUM);
19530 else if (!push && (mask & (1 << PC_REGNUM)))
19532 /* Catch popping the PC. */
19533 if (TARGET_INTERWORK || TARGET_BACKTRACE
19534 || crtl->calls_eh_return)
19536 /* The PC is never poped directly, instead
19537 it is popped into r3 and then BX is used. */
19538 fprintf (f, "}\n");
19540 thumb_exit (f, -1);
19549 asm_fprintf (f, "%r", PC_REGNUM);
19553 fprintf (f, "}\n");
19555 if (push && pushed_words && dwarf2out_do_frame ())
19557 char *l = dwarf2out_cfi_label (false);
19558 int pushed_mask = real_regs;
19560 *cfa_offset += pushed_words * 4;
19561 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19564 pushed_mask = real_regs;
19565 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19567 if (pushed_mask & 1)
19568 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19573 /* Generate code to return from a thumb function.
19574 If 'reg_containing_return_addr' is -1, then the return address is
19575 actually on the stack, at the stack pointer. */
19577 thumb_exit (FILE *f, int reg_containing_return_addr)
19579 unsigned regs_available_for_popping;
19580 unsigned regs_to_pop;
19582 unsigned available;
19586 int restore_a4 = FALSE;
19588 /* Compute the registers we need to pop. */
19592 if (reg_containing_return_addr == -1)
19594 regs_to_pop |= 1 << LR_REGNUM;
19598 if (TARGET_BACKTRACE)
19600 /* Restore the (ARM) frame pointer and stack pointer. */
19601 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
19605 /* If there is nothing to pop then just emit the BX instruction and
19607 if (pops_needed == 0)
19609 if (crtl->calls_eh_return)
19610 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19612 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19615 /* Otherwise if we are not supporting interworking and we have not created
19616 a backtrace structure and the function was not entered in ARM mode then
19617 just pop the return address straight into the PC. */
19618 else if (!TARGET_INTERWORK
19619 && !TARGET_BACKTRACE
19620 && !is_called_in_ARM_mode (current_function_decl)
19621 && !crtl->calls_eh_return)
19623 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
19627 /* Find out how many of the (return) argument registers we can corrupt. */
19628 regs_available_for_popping = 0;
19630 /* If returning via __builtin_eh_return, the bottom three registers
19631 all contain information needed for the return. */
19632 if (crtl->calls_eh_return)
19636 /* If we can deduce the registers used from the function's
19637 return value. This is more reliable that examining
19638 df_regs_ever_live_p () because that will be set if the register is
19639 ever used in the function, not just if the register is used
19640 to hold a return value. */
19642 if (crtl->return_rtx != 0)
19643 mode = GET_MODE (crtl->return_rtx);
19645 mode = DECL_MODE (DECL_RESULT (current_function_decl));
19647 size = GET_MODE_SIZE (mode);
19651 /* In a void function we can use any argument register.
19652 In a function that returns a structure on the stack
19653 we can use the second and third argument registers. */
19654 if (mode == VOIDmode)
19655 regs_available_for_popping =
19656 (1 << ARG_REGISTER (1))
19657 | (1 << ARG_REGISTER (2))
19658 | (1 << ARG_REGISTER (3));
19660 regs_available_for_popping =
19661 (1 << ARG_REGISTER (2))
19662 | (1 << ARG_REGISTER (3));
19664 else if (size <= 4)
19665 regs_available_for_popping =
19666 (1 << ARG_REGISTER (2))
19667 | (1 << ARG_REGISTER (3));
19668 else if (size <= 8)
19669 regs_available_for_popping =
19670 (1 << ARG_REGISTER (3));
19673 /* Match registers to be popped with registers into which we pop them. */
19674 for (available = regs_available_for_popping,
19675 required = regs_to_pop;
19676 required != 0 && available != 0;
19677 available &= ~(available & - available),
19678 required &= ~(required & - required))
19681 /* If we have any popping registers left over, remove them. */
19683 regs_available_for_popping &= ~available;
19685 /* Otherwise if we need another popping register we can use
19686 the fourth argument register. */
19687 else if (pops_needed)
19689 /* If we have not found any free argument registers and
19690 reg a4 contains the return address, we must move it. */
19691 if (regs_available_for_popping == 0
19692 && reg_containing_return_addr == LAST_ARG_REGNUM)
19694 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19695 reg_containing_return_addr = LR_REGNUM;
19697 else if (size > 12)
19699 /* Register a4 is being used to hold part of the return value,
19700 but we have dire need of a free, low register. */
19703 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
19706 if (reg_containing_return_addr != LAST_ARG_REGNUM)
19708 /* The fourth argument register is available. */
19709 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
19715 /* Pop as many registers as we can. */
19716 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19717 regs_available_for_popping);
19719 /* Process the registers we popped. */
19720 if (reg_containing_return_addr == -1)
19722 /* The return address was popped into the lowest numbered register. */
19723 regs_to_pop &= ~(1 << LR_REGNUM);
19725 reg_containing_return_addr =
19726 number_of_first_bit_set (regs_available_for_popping);
19728 /* Remove this register for the mask of available registers, so that
19729 the return address will not be corrupted by further pops. */
19730 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
19733 /* If we popped other registers then handle them here. */
19734 if (regs_available_for_popping)
19738 /* Work out which register currently contains the frame pointer. */
19739 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
19741 /* Move it into the correct place. */
19742 asm_fprintf (f, "\tmov\t%r, %r\n",
19743 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
19745 /* (Temporarily) remove it from the mask of popped registers. */
19746 regs_available_for_popping &= ~(1 << frame_pointer);
19747 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
19749 if (regs_available_for_popping)
19753 /* We popped the stack pointer as well,
19754 find the register that contains it. */
19755 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
19757 /* Move it into the stack register. */
19758 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
19760 /* At this point we have popped all necessary registers, so
19761 do not worry about restoring regs_available_for_popping
19762 to its correct value:
19764 assert (pops_needed == 0)
19765 assert (regs_available_for_popping == (1 << frame_pointer))
19766 assert (regs_to_pop == (1 << STACK_POINTER)) */
19770 /* Since we have just move the popped value into the frame
19771 pointer, the popping register is available for reuse, and
19772 we know that we still have the stack pointer left to pop. */
19773 regs_available_for_popping |= (1 << frame_pointer);
19777 /* If we still have registers left on the stack, but we no longer have
19778 any registers into which we can pop them, then we must move the return
19779 address into the link register and make available the register that
19781 if (regs_available_for_popping == 0 && pops_needed > 0)
19783 regs_available_for_popping |= 1 << reg_containing_return_addr;
19785 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
19786 reg_containing_return_addr);
19788 reg_containing_return_addr = LR_REGNUM;
19791 /* If we have registers left on the stack then pop some more.
19792 We know that at most we will want to pop FP and SP. */
19793 if (pops_needed > 0)
19798 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19799 regs_available_for_popping);
19801 /* We have popped either FP or SP.
19802 Move whichever one it is into the correct register. */
19803 popped_into = number_of_first_bit_set (regs_available_for_popping);
19804 move_to = number_of_first_bit_set (regs_to_pop);
19806 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
19808 regs_to_pop &= ~(1 << move_to);
19813 /* If we still have not popped everything then we must have only
19814 had one register available to us and we are now popping the SP. */
19815 if (pops_needed > 0)
19819 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19820 regs_available_for_popping);
19822 popped_into = number_of_first_bit_set (regs_available_for_popping);
19824 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
19826 assert (regs_to_pop == (1 << STACK_POINTER))
19827 assert (pops_needed == 1)
19831 /* If necessary restore the a4 register. */
19834 if (reg_containing_return_addr != LR_REGNUM)
19836 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19837 reg_containing_return_addr = LR_REGNUM;
19840 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
19843 if (crtl->calls_eh_return)
19844 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19846 /* Return to caller. */
19847 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19850 /* Scan INSN just before assembler is output for it.
19851 For Thumb-1, we track the status of the condition codes; this
19852 information is used in the cbranchsi4_insn pattern. */
19854 thumb1_final_prescan_insn (rtx insn)
19856 if (flag_print_asm_name)
19857 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
19858 INSN_ADDRESSES (INSN_UID (insn)));
19859 /* Don't overwrite the previous setter when we get to a cbranch. */
19860 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19862 enum attr_conds conds;
19864 if (cfun->machine->thumb1_cc_insn)
19866 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
19867 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
19870 conds = get_attr_conds (insn);
19871 if (conds == CONDS_SET)
19873 rtx set = single_set (insn);
19874 cfun->machine->thumb1_cc_insn = insn;
19875 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
19876 cfun->machine->thumb1_cc_op1 = const0_rtx;
19877 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
19878 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
19880 rtx src1 = XEXP (SET_SRC (set), 1);
19881 if (src1 == const0_rtx)
19882 cfun->machine->thumb1_cc_mode = CCmode;
19885 else if (conds != CONDS_NOCOND)
19886 cfun->machine->thumb1_cc_insn = NULL_RTX;
19891 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
19893 unsigned HOST_WIDE_INT mask = 0xff;
19896 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
19897 if (val == 0) /* XXX */
19900 for (i = 0; i < 25; i++)
19901 if ((val & (mask << i)) == val)
19907 /* Returns nonzero if the current function contains,
19908 or might contain a far jump. */
19910 thumb_far_jump_used_p (void)
19914 /* This test is only important for leaf functions. */
19915 /* assert (!leaf_function_p ()); */
19917 /* If we have already decided that far jumps may be used,
19918 do not bother checking again, and always return true even if
19919 it turns out that they are not being used. Once we have made
19920 the decision that far jumps are present (and that hence the link
19921 register will be pushed onto the stack) we cannot go back on it. */
19922 if (cfun->machine->far_jump_used)
19925 /* If this function is not being called from the prologue/epilogue
19926 generation code then it must be being called from the
19927 INITIAL_ELIMINATION_OFFSET macro. */
19928 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
19930 /* In this case we know that we are being asked about the elimination
19931 of the arg pointer register. If that register is not being used,
19932 then there are no arguments on the stack, and we do not have to
19933 worry that a far jump might force the prologue to push the link
19934 register, changing the stack offsets. In this case we can just
19935 return false, since the presence of far jumps in the function will
19936 not affect stack offsets.
19938 If the arg pointer is live (or if it was live, but has now been
19939 eliminated and so set to dead) then we do have to test to see if
19940 the function might contain a far jump. This test can lead to some
19941 false negatives, since before reload is completed, then length of
19942 branch instructions is not known, so gcc defaults to returning their
19943 longest length, which in turn sets the far jump attribute to true.
19945 A false negative will not result in bad code being generated, but it
19946 will result in a needless push and pop of the link register. We
19947 hope that this does not occur too often.
19949 If we need doubleword stack alignment this could affect the other
19950 elimination offsets so we can't risk getting it wrong. */
19951 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
19952 cfun->machine->arg_pointer_live = 1;
19953 else if (!cfun->machine->arg_pointer_live)
19957 /* Check to see if the function contains a branch
19958 insn with the far jump attribute set. */
19959 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19961 if (GET_CODE (insn) == JUMP_INSN
19962 /* Ignore tablejump patterns. */
19963 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19964 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
19965 && get_attr_far_jump (insn) == FAR_JUMP_YES
19968 /* Record the fact that we have decided that
19969 the function does use far jumps. */
19970 cfun->machine->far_jump_used = 1;
19978 /* Return nonzero if FUNC must be entered in ARM mode. */
19980 is_called_in_ARM_mode (tree func)
19982 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
19984 /* Ignore the problem about functions whose address is taken. */
19985 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
19989 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
19995 /* Given the stack offsets and register mask in OFFSETS, decide how
19996 many additional registers to push instead of subtracting a constant
19997 from SP. For epilogues the principle is the same except we use pop.
19998 FOR_PROLOGUE indicates which we're generating. */
20000 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20002 HOST_WIDE_INT amount;
20003 unsigned long live_regs_mask = offsets->saved_regs_mask;
20004 /* Extract a mask of the ones we can give to the Thumb's push/pop
20006 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20007 /* Then count how many other high registers will need to be pushed. */
20008 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20009 int n_free, reg_base;
20011 if (!for_prologue && frame_pointer_needed)
20012 amount = offsets->locals_base - offsets->saved_regs;
20014 amount = offsets->outgoing_args - offsets->saved_regs;
20016 /* If the stack frame size is 512 exactly, we can save one load
20017 instruction, which should make this a win even when optimizing
20019 if (!optimize_size && amount != 512)
20022 /* Can't do this if there are high registers to push. */
20023 if (high_regs_pushed != 0)
20026 /* Shouldn't do it in the prologue if no registers would normally
20027 be pushed at all. In the epilogue, also allow it if we'll have
20028 a pop insn for the PC. */
20031 || TARGET_BACKTRACE
20032 || (live_regs_mask & 1 << LR_REGNUM) == 0
20033 || TARGET_INTERWORK
20034 || crtl->args.pretend_args_size != 0))
20037 /* Don't do this if thumb_expand_prologue wants to emit instructions
20038 between the push and the stack frame allocation. */
20040 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20041 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20048 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20049 live_regs_mask >>= reg_base;
20052 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20053 && (for_prologue || call_used_regs[reg_base + n_free]))
20055 live_regs_mask >>= 1;
20061 gcc_assert (amount / 4 * 4 == amount);
20063 if (amount >= 512 && (amount - n_free * 4) < 512)
20064 return (amount - 508) / 4;
20065 if (amount <= n_free * 4)
20070 /* The bits which aren't usefully expanded as rtl. */
20072 thumb_unexpanded_epilogue (void)
20074 arm_stack_offsets *offsets;
20076 unsigned long live_regs_mask = 0;
20077 int high_regs_pushed = 0;
20079 int had_to_push_lr;
20082 if (cfun->machine->return_used_this_function != 0)
20085 if (IS_NAKED (arm_current_func_type ()))
20088 offsets = arm_get_frame_offsets ();
20089 live_regs_mask = offsets->saved_regs_mask;
20090 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20092 /* If we can deduce the registers used from the function's return value.
20093 This is more reliable that examining df_regs_ever_live_p () because that
20094 will be set if the register is ever used in the function, not just if
20095 the register is used to hold a return value. */
20096 size = arm_size_return_regs ();
20098 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20101 unsigned long extra_mask = (1 << extra_pop) - 1;
20102 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20105 /* The prolog may have pushed some high registers to use as
20106 work registers. e.g. the testsuite file:
20107 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20108 compiles to produce:
20109 push {r4, r5, r6, r7, lr}
20113 as part of the prolog. We have to undo that pushing here. */
20115 if (high_regs_pushed)
20117 unsigned long mask = live_regs_mask & 0xff;
20120 /* The available low registers depend on the size of the value we are
20128 /* Oh dear! We have no low registers into which we can pop
20131 ("no low registers available for popping high registers");
20133 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20134 if (live_regs_mask & (1 << next_hi_reg))
20137 while (high_regs_pushed)
20139 /* Find lo register(s) into which the high register(s) can
20141 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20143 if (mask & (1 << regno))
20144 high_regs_pushed--;
20145 if (high_regs_pushed == 0)
20149 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20151 /* Pop the values into the low register(s). */
20152 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20154 /* Move the value(s) into the high registers. */
20155 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20157 if (mask & (1 << regno))
20159 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20162 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20163 if (live_regs_mask & (1 << next_hi_reg))
20168 live_regs_mask &= ~0x0f00;
20171 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20172 live_regs_mask &= 0xff;
20174 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20176 /* Pop the return address into the PC. */
20177 if (had_to_push_lr)
20178 live_regs_mask |= 1 << PC_REGNUM;
20180 /* Either no argument registers were pushed or a backtrace
20181 structure was created which includes an adjusted stack
20182 pointer, so just pop everything. */
20183 if (live_regs_mask)
20184 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20187 /* We have either just popped the return address into the
20188 PC or it is was kept in LR for the entire function.
20189 Note that thumb_pushpop has already called thumb_exit if the
20190 PC was in the list. */
20191 if (!had_to_push_lr)
20192 thumb_exit (asm_out_file, LR_REGNUM);
20196 /* Pop everything but the return address. */
20197 if (live_regs_mask)
20198 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20201 if (had_to_push_lr)
20205 /* We have no free low regs, so save one. */
20206 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20210 /* Get the return address into a temporary register. */
20211 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20212 1 << LAST_ARG_REGNUM);
20216 /* Move the return address to lr. */
20217 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20219 /* Restore the low register. */
20220 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20225 regno = LAST_ARG_REGNUM;
20230 /* Remove the argument registers that were pushed onto the stack. */
20231 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20232 SP_REGNUM, SP_REGNUM,
20233 crtl->args.pretend_args_size);
20235 thumb_exit (asm_out_file, regno);
20241 /* Functions to save and restore machine-specific function data. */
20242 static struct machine_function *
20243 arm_init_machine_status (void)
20245 struct machine_function *machine;
20246 machine = ggc_alloc_cleared_machine_function ();
20248 #if ARM_FT_UNKNOWN != 0
20249 machine->func_type = ARM_FT_UNKNOWN;
20254 /* Return an RTX indicating where the return address to the
20255 calling function can be found. */
20257 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20262 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20265 /* Do anything needed before RTL is emitted for each function. */
20267 arm_init_expanders (void)
20269 /* Arrange to initialize and mark the machine per-function status. */
20270 init_machine_status = arm_init_machine_status;
20272 /* This is to stop the combine pass optimizing away the alignment
20273 adjustment of va_arg. */
20274 /* ??? It is claimed that this should not be necessary. */
20276 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20280 /* Like arm_compute_initial_elimination offset. Simpler because there
20281 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20282 to point at the base of the local variables after static stack
20283 space for a function has been allocated. */
20286 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20288 arm_stack_offsets *offsets;
20290 offsets = arm_get_frame_offsets ();
20294 case ARG_POINTER_REGNUM:
20297 case STACK_POINTER_REGNUM:
20298 return offsets->outgoing_args - offsets->saved_args;
20300 case FRAME_POINTER_REGNUM:
20301 return offsets->soft_frame - offsets->saved_args;
20303 case ARM_HARD_FRAME_POINTER_REGNUM:
20304 return offsets->saved_regs - offsets->saved_args;
20306 case THUMB_HARD_FRAME_POINTER_REGNUM:
20307 return offsets->locals_base - offsets->saved_args;
20310 gcc_unreachable ();
20314 case FRAME_POINTER_REGNUM:
20317 case STACK_POINTER_REGNUM:
20318 return offsets->outgoing_args - offsets->soft_frame;
20320 case ARM_HARD_FRAME_POINTER_REGNUM:
20321 return offsets->saved_regs - offsets->soft_frame;
20323 case THUMB_HARD_FRAME_POINTER_REGNUM:
20324 return offsets->locals_base - offsets->soft_frame;
20327 gcc_unreachable ();
20332 gcc_unreachable ();
20336 /* Generate the rest of a function's prologue. */
20338 thumb1_expand_prologue (void)
20342 HOST_WIDE_INT amount;
20343 arm_stack_offsets *offsets;
20344 unsigned long func_type;
20346 unsigned long live_regs_mask;
20348 func_type = arm_current_func_type ();
20350 /* Naked functions don't have prologues. */
20351 if (IS_NAKED (func_type))
20354 if (IS_INTERRUPT (func_type))
20356 error ("interrupt Service Routines cannot be coded in Thumb mode");
20360 offsets = arm_get_frame_offsets ();
20361 live_regs_mask = offsets->saved_regs_mask;
20362 /* Load the pic register before setting the frame pointer,
20363 so we can use r7 as a temporary work register. */
20364 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20365 arm_load_pic_register (live_regs_mask);
20367 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20368 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20369 stack_pointer_rtx);
20371 amount = offsets->outgoing_args - offsets->saved_regs;
20372 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20377 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20378 GEN_INT (- amount)));
20379 RTX_FRAME_RELATED_P (insn) = 1;
20385 /* The stack decrement is too big for an immediate value in a single
20386 insn. In theory we could issue multiple subtracts, but after
20387 three of them it becomes more space efficient to place the full
20388 value in the constant pool and load into a register. (Also the
20389 ARM debugger really likes to see only one stack decrement per
20390 function). So instead we look for a scratch register into which
20391 we can load the decrement, and then we subtract this from the
20392 stack pointer. Unfortunately on the thumb the only available
20393 scratch registers are the argument registers, and we cannot use
20394 these as they may hold arguments to the function. Instead we
20395 attempt to locate a call preserved register which is used by this
20396 function. If we can find one, then we know that it will have
20397 been pushed at the start of the prologue and so we can corrupt
20399 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20400 if (live_regs_mask & (1 << regno))
20403 gcc_assert(regno <= LAST_LO_REGNUM);
20405 reg = gen_rtx_REG (SImode, regno);
20407 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20409 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20410 stack_pointer_rtx, reg));
20411 RTX_FRAME_RELATED_P (insn) = 1;
20412 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20413 plus_constant (stack_pointer_rtx,
20415 RTX_FRAME_RELATED_P (dwarf) = 1;
20416 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20420 if (frame_pointer_needed)
20421 thumb_set_frame_pointer (offsets);
20423 /* If we are profiling, make sure no instructions are scheduled before
20424 the call to mcount. Similarly if the user has requested no
20425 scheduling in the prolog. Similarly if we want non-call exceptions
20426 using the EABI unwinder, to prevent faulting instructions from being
20427 swapped with a stack adjustment. */
20428 if (crtl->profile || !TARGET_SCHED_PROLOG
20429 || (ARM_EABI_UNWIND_TABLES && cfun->can_throw_non_call_exceptions))
20430 emit_insn (gen_blockage ());
20432 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20433 if (live_regs_mask & 0xff)
20434 cfun->machine->lr_save_eliminated = 0;
20439 thumb1_expand_epilogue (void)
20441 HOST_WIDE_INT amount;
20442 arm_stack_offsets *offsets;
20445 /* Naked functions don't have prologues. */
20446 if (IS_NAKED (arm_current_func_type ()))
20449 offsets = arm_get_frame_offsets ();
20450 amount = offsets->outgoing_args - offsets->saved_regs;
20452 if (frame_pointer_needed)
20454 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20455 amount = offsets->locals_base - offsets->saved_regs;
20457 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20459 gcc_assert (amount >= 0);
20463 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20464 GEN_INT (amount)));
20467 /* r3 is always free in the epilogue. */
20468 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20470 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20471 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20475 /* Emit a USE (stack_pointer_rtx), so that
20476 the stack adjustment will not be deleted. */
20477 emit_insn (gen_prologue_use (stack_pointer_rtx));
20479 if (crtl->profile || !TARGET_SCHED_PROLOG)
20480 emit_insn (gen_blockage ());
20482 /* Emit a clobber for each insn that will be restored in the epilogue,
20483 so that flow2 will get register lifetimes correct. */
20484 for (regno = 0; regno < 13; regno++)
20485 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20486 emit_clobber (gen_rtx_REG (SImode, regno));
20488 if (! df_regs_ever_live_p (LR_REGNUM))
20489 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20493 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20495 arm_stack_offsets *offsets;
20496 unsigned long live_regs_mask = 0;
20497 unsigned long l_mask;
20498 unsigned high_regs_pushed = 0;
20499 int cfa_offset = 0;
20502 if (IS_NAKED (arm_current_func_type ()))
20505 if (is_called_in_ARM_mode (current_function_decl))
20509 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20510 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20512 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20514 /* Generate code sequence to switch us into Thumb mode. */
20515 /* The .code 32 directive has already been emitted by
20516 ASM_DECLARE_FUNCTION_NAME. */
20517 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20518 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20520 /* Generate a label, so that the debugger will notice the
20521 change in instruction sets. This label is also used by
20522 the assembler to bypass the ARM code when this function
20523 is called from a Thumb encoded function elsewhere in the
20524 same file. Hence the definition of STUB_NAME here must
20525 agree with the definition in gas/config/tc-arm.c. */
20527 #define STUB_NAME ".real_start_of"
20529 fprintf (f, "\t.code\t16\n");
20531 if (arm_dllexport_name_p (name))
20532 name = arm_strip_name_encoding (name);
20534 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20535 fprintf (f, "\t.thumb_func\n");
20536 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20539 if (crtl->args.pretend_args_size)
20541 /* Output unwind directive for the stack adjustment. */
20542 if (ARM_EABI_UNWIND_TABLES)
20543 fprintf (f, "\t.pad #%d\n",
20544 crtl->args.pretend_args_size);
20546 if (cfun->machine->uses_anonymous_args)
20550 fprintf (f, "\tpush\t{");
20552 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20554 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20555 regno <= LAST_ARG_REGNUM;
20557 asm_fprintf (f, "%r%s", regno,
20558 regno == LAST_ARG_REGNUM ? "" : ", ");
20560 fprintf (f, "}\n");
20563 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20564 SP_REGNUM, SP_REGNUM,
20565 crtl->args.pretend_args_size);
20567 /* We don't need to record the stores for unwinding (would it
20568 help the debugger any if we did?), but record the change in
20569 the stack pointer. */
20570 if (dwarf2out_do_frame ())
20572 char *l = dwarf2out_cfi_label (false);
20574 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
20575 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20579 /* Get the registers we are going to push. */
20580 offsets = arm_get_frame_offsets ();
20581 live_regs_mask = offsets->saved_regs_mask;
20582 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
20583 l_mask = live_regs_mask & 0x40ff;
20584 /* Then count how many other high registers will need to be pushed. */
20585 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20587 if (TARGET_BACKTRACE)
20590 unsigned work_register;
20592 /* We have been asked to create a stack backtrace structure.
20593 The code looks like this:
20597 0 sub SP, #16 Reserve space for 4 registers.
20598 2 push {R7} Push low registers.
20599 4 add R7, SP, #20 Get the stack pointer before the push.
20600 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
20601 8 mov R7, PC Get hold of the start of this code plus 12.
20602 10 str R7, [SP, #16] Store it.
20603 12 mov R7, FP Get hold of the current frame pointer.
20604 14 str R7, [SP, #4] Store it.
20605 16 mov R7, LR Get hold of the current return address.
20606 18 str R7, [SP, #12] Store it.
20607 20 add R7, SP, #16 Point at the start of the backtrace structure.
20608 22 mov FP, R7 Put this value into the frame pointer. */
20610 work_register = thumb_find_work_register (live_regs_mask);
20612 if (ARM_EABI_UNWIND_TABLES)
20613 asm_fprintf (f, "\t.pad #16\n");
20616 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
20617 SP_REGNUM, SP_REGNUM);
20619 if (dwarf2out_do_frame ())
20621 char *l = dwarf2out_cfi_label (false);
20623 cfa_offset = cfa_offset + 16;
20624 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
20629 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
20630 offset = bit_count (l_mask) * UNITS_PER_WORD;
20635 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20636 offset + 16 + crtl->args.pretend_args_size);
20638 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20641 /* Make sure that the instruction fetching the PC is in the right place
20642 to calculate "start of backtrace creation code + 12". */
20645 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20646 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20648 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20649 ARM_HARD_FRAME_POINTER_REGNUM);
20650 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20655 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
20656 ARM_HARD_FRAME_POINTER_REGNUM);
20657 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20659 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
20660 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20664 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
20665 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
20667 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
20669 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
20670 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
20672 /* Optimization: If we are not pushing any low registers but we are going
20673 to push some high registers then delay our first push. This will just
20674 be a push of LR and we can combine it with the push of the first high
20676 else if ((l_mask & 0xff) != 0
20677 || (high_regs_pushed == 0 && l_mask))
20679 unsigned long mask = l_mask;
20680 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
20681 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
20684 if (high_regs_pushed)
20686 unsigned pushable_regs;
20687 unsigned next_hi_reg;
20689 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
20690 if (live_regs_mask & (1 << next_hi_reg))
20693 pushable_regs = l_mask & 0xff;
20695 if (pushable_regs == 0)
20696 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
20698 while (high_regs_pushed > 0)
20700 unsigned long real_regs_mask = 0;
20702 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
20704 if (pushable_regs & (1 << regno))
20706 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
20708 high_regs_pushed --;
20709 real_regs_mask |= (1 << next_hi_reg);
20711 if (high_regs_pushed)
20713 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
20715 if (live_regs_mask & (1 << next_hi_reg))
20720 pushable_regs &= ~((1 << regno) - 1);
20726 /* If we had to find a work register and we have not yet
20727 saved the LR then add it to the list of regs to push. */
20728 if (l_mask == (1 << LR_REGNUM))
20730 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
20732 real_regs_mask | (1 << LR_REGNUM));
20736 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
20741 /* Handle the case of a double word load into a low register from
20742 a computed memory address. The computed address may involve a
20743 register which is overwritten by the load. */
20745 thumb_load_double_from_address (rtx *operands)
20753 gcc_assert (GET_CODE (operands[0]) == REG);
20754 gcc_assert (GET_CODE (operands[1]) == MEM);
20756 /* Get the memory address. */
20757 addr = XEXP (operands[1], 0);
20759 /* Work out how the memory address is computed. */
20760 switch (GET_CODE (addr))
20763 operands[2] = adjust_address (operands[1], SImode, 4);
20765 if (REGNO (operands[0]) == REGNO (addr))
20767 output_asm_insn ("ldr\t%H0, %2", operands);
20768 output_asm_insn ("ldr\t%0, %1", operands);
20772 output_asm_insn ("ldr\t%0, %1", operands);
20773 output_asm_insn ("ldr\t%H0, %2", operands);
20778 /* Compute <address> + 4 for the high order load. */
20779 operands[2] = adjust_address (operands[1], SImode, 4);
20781 output_asm_insn ("ldr\t%0, %1", operands);
20782 output_asm_insn ("ldr\t%H0, %2", operands);
20786 arg1 = XEXP (addr, 0);
20787 arg2 = XEXP (addr, 1);
20789 if (CONSTANT_P (arg1))
20790 base = arg2, offset = arg1;
20792 base = arg1, offset = arg2;
20794 gcc_assert (GET_CODE (base) == REG);
20796 /* Catch the case of <address> = <reg> + <reg> */
20797 if (GET_CODE (offset) == REG)
20799 int reg_offset = REGNO (offset);
20800 int reg_base = REGNO (base);
20801 int reg_dest = REGNO (operands[0]);
20803 /* Add the base and offset registers together into the
20804 higher destination register. */
20805 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
20806 reg_dest + 1, reg_base, reg_offset);
20808 /* Load the lower destination register from the address in
20809 the higher destination register. */
20810 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
20811 reg_dest, reg_dest + 1);
20813 /* Load the higher destination register from its own address
20815 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
20816 reg_dest + 1, reg_dest + 1);
20820 /* Compute <address> + 4 for the high order load. */
20821 operands[2] = adjust_address (operands[1], SImode, 4);
20823 /* If the computed address is held in the low order register
20824 then load the high order register first, otherwise always
20825 load the low order register first. */
20826 if (REGNO (operands[0]) == REGNO (base))
20828 output_asm_insn ("ldr\t%H0, %2", operands);
20829 output_asm_insn ("ldr\t%0, %1", operands);
20833 output_asm_insn ("ldr\t%0, %1", operands);
20834 output_asm_insn ("ldr\t%H0, %2", operands);
20840 /* With no registers to worry about we can just load the value
20842 operands[2] = adjust_address (operands[1], SImode, 4);
20844 output_asm_insn ("ldr\t%H0, %2", operands);
20845 output_asm_insn ("ldr\t%0, %1", operands);
20849 gcc_unreachable ();
20856 thumb_output_move_mem_multiple (int n, rtx *operands)
20863 if (REGNO (operands[4]) > REGNO (operands[5]))
20866 operands[4] = operands[5];
20869 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
20870 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
20874 if (REGNO (operands[4]) > REGNO (operands[5]))
20877 operands[4] = operands[5];
20880 if (REGNO (operands[5]) > REGNO (operands[6]))
20883 operands[5] = operands[6];
20886 if (REGNO (operands[4]) > REGNO (operands[5]))
20889 operands[4] = operands[5];
20893 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
20894 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
20898 gcc_unreachable ();
20904 /* Output a call-via instruction for thumb state. */
20906 thumb_call_via_reg (rtx reg)
20908 int regno = REGNO (reg);
20911 gcc_assert (regno < LR_REGNUM);
20913 /* If we are in the normal text section we can use a single instance
20914 per compilation unit. If we are doing function sections, then we need
20915 an entry per section, since we can't rely on reachability. */
20916 if (in_section == text_section)
20918 thumb_call_reg_needed = 1;
20920 if (thumb_call_via_label[regno] == NULL)
20921 thumb_call_via_label[regno] = gen_label_rtx ();
20922 labelp = thumb_call_via_label + regno;
20926 if (cfun->machine->call_via[regno] == NULL)
20927 cfun->machine->call_via[regno] = gen_label_rtx ();
20928 labelp = cfun->machine->call_via + regno;
20931 output_asm_insn ("bl\t%a0", labelp);
20935 /* Routines for generating rtl. */
20937 thumb_expand_movmemqi (rtx *operands)
20939 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
20940 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
20941 HOST_WIDE_INT len = INTVAL (operands[2]);
20942 HOST_WIDE_INT offset = 0;
20946 emit_insn (gen_movmem12b (out, in, out, in));
20952 emit_insn (gen_movmem8b (out, in, out, in));
20958 rtx reg = gen_reg_rtx (SImode);
20959 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
20960 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
20967 rtx reg = gen_reg_rtx (HImode);
20968 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
20969 plus_constant (in, offset))));
20970 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
20978 rtx reg = gen_reg_rtx (QImode);
20979 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
20980 plus_constant (in, offset))));
20981 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
20987 thumb_reload_out_hi (rtx *operands)
20989 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
20992 /* Handle reading a half-word from memory during reload. */
20994 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
20996 gcc_unreachable ();
20999 /* Return the length of a function name prefix
21000 that starts with the character 'c'. */
21002 arm_get_strip_length (int c)
21006 ARM_NAME_ENCODING_LENGTHS
21011 /* Return a pointer to a function's name with any
21012 and all prefix encodings stripped from it. */
21014 arm_strip_name_encoding (const char *name)
21018 while ((skip = arm_get_strip_length (* name)))
21024 /* If there is a '*' anywhere in the name's prefix, then
21025 emit the stripped name verbatim, otherwise prepend an
21026 underscore if leading underscores are being used. */
21028 arm_asm_output_labelref (FILE *stream, const char *name)
21033 while ((skip = arm_get_strip_length (* name)))
21035 verbatim |= (*name == '*');
21040 fputs (name, stream);
21042 asm_fprintf (stream, "%U%s", name);
21046 arm_file_start (void)
21050 if (TARGET_UNIFIED_ASM)
21051 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21055 const char *fpu_name;
21056 if (arm_selected_arch)
21057 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21059 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21061 if (TARGET_SOFT_FLOAT)
21064 fpu_name = "softvfp";
21066 fpu_name = "softfpa";
21070 fpu_name = arm_fpu_desc->name;
21071 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21073 if (TARGET_HARD_FLOAT)
21074 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21075 if (TARGET_HARD_FLOAT_ABI)
21076 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21079 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21081 /* Some of these attributes only apply when the corresponding features
21082 are used. However we don't have any easy way of figuring this out.
21083 Conservatively record the setting that would have been used. */
21085 /* Tag_ABI_FP_rounding. */
21086 if (flag_rounding_math)
21087 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21088 if (!flag_unsafe_math_optimizations)
21090 /* Tag_ABI_FP_denomal. */
21091 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21092 /* Tag_ABI_FP_exceptions. */
21093 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21095 /* Tag_ABI_FP_user_exceptions. */
21096 if (flag_signaling_nans)
21097 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21098 /* Tag_ABI_FP_number_model. */
21099 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21100 flag_finite_math_only ? 1 : 3);
21102 /* Tag_ABI_align8_needed. */
21103 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21104 /* Tag_ABI_align8_preserved. */
21105 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21106 /* Tag_ABI_enum_size. */
21107 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21108 flag_short_enums ? 1 : 2);
21110 /* Tag_ABI_optimization_goals. */
21113 else if (optimize >= 2)
21119 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21121 /* Tag_ABI_FP_16bit_format. */
21122 if (arm_fp16_format)
21123 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21124 (int)arm_fp16_format);
21126 if (arm_lang_output_object_attributes_hook)
21127 arm_lang_output_object_attributes_hook();
21129 default_file_start();
21133 arm_file_end (void)
21137 if (NEED_INDICATE_EXEC_STACK)
21138 /* Add .note.GNU-stack. */
21139 file_end_indicate_exec_stack ();
21141 if (! thumb_call_reg_needed)
21144 switch_to_section (text_section);
21145 asm_fprintf (asm_out_file, "\t.code 16\n");
21146 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21148 for (regno = 0; regno < LR_REGNUM; regno++)
21150 rtx label = thumb_call_via_label[regno];
21154 targetm.asm_out.internal_label (asm_out_file, "L",
21155 CODE_LABEL_NUMBER (label));
21156 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21162 /* Symbols in the text segment can be accessed without indirecting via the
21163 constant pool; it may take an extra binary operation, but this is still
21164 faster than indirecting via memory. Don't do this when not optimizing,
21165 since we won't be calculating al of the offsets necessary to do this
21169 arm_encode_section_info (tree decl, rtx rtl, int first)
21171 if (optimize > 0 && TREE_CONSTANT (decl))
21172 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21174 default_encode_section_info (decl, rtl, first);
21176 #endif /* !ARM_PE */
21179 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21181 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21182 && !strcmp (prefix, "L"))
21184 arm_ccfsm_state = 0;
21185 arm_target_insn = NULL;
21187 default_internal_label (stream, prefix, labelno);
21190 /* Output code to add DELTA to the first argument, and then jump
21191 to FUNCTION. Used for C++ multiple inheritance. */
21193 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21194 HOST_WIDE_INT delta,
21195 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21198 static int thunk_label = 0;
21201 int mi_delta = delta;
21202 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21204 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21207 mi_delta = - mi_delta;
21211 int labelno = thunk_label++;
21212 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21213 /* Thunks are entered in arm mode when avaiable. */
21214 if (TARGET_THUMB1_ONLY)
21216 /* push r3 so we can use it as a temporary. */
21217 /* TODO: Omit this save if r3 is not used. */
21218 fputs ("\tpush {r3}\n", file);
21219 fputs ("\tldr\tr3, ", file);
21223 fputs ("\tldr\tr12, ", file);
21225 assemble_name (file, label);
21226 fputc ('\n', file);
21229 /* If we are generating PIC, the ldr instruction below loads
21230 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21231 the address of the add + 8, so we have:
21233 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21236 Note that we have "+ 1" because some versions of GNU ld
21237 don't set the low bit of the result for R_ARM_REL32
21238 relocations against thumb function symbols.
21239 On ARMv6M this is +4, not +8. */
21240 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21241 assemble_name (file, labelpc);
21242 fputs (":\n", file);
21243 if (TARGET_THUMB1_ONLY)
21245 /* This is 2 insns after the start of the thunk, so we know it
21246 is 4-byte aligned. */
21247 fputs ("\tadd\tr3, pc, r3\n", file);
21248 fputs ("\tmov r12, r3\n", file);
21251 fputs ("\tadd\tr12, pc, r12\n", file);
21253 else if (TARGET_THUMB1_ONLY)
21254 fputs ("\tmov r12, r3\n", file);
21256 if (TARGET_THUMB1_ONLY)
21258 if (mi_delta > 255)
21260 fputs ("\tldr\tr3, ", file);
21261 assemble_name (file, label);
21262 fputs ("+4\n", file);
21263 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21264 mi_op, this_regno, this_regno);
21266 else if (mi_delta != 0)
21268 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21269 mi_op, this_regno, this_regno,
21275 /* TODO: Use movw/movt for large constants when available. */
21276 while (mi_delta != 0)
21278 if ((mi_delta & (3 << shift)) == 0)
21282 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21283 mi_op, this_regno, this_regno,
21284 mi_delta & (0xff << shift));
21285 mi_delta &= ~(0xff << shift);
21292 if (TARGET_THUMB1_ONLY)
21293 fputs ("\tpop\t{r3}\n", file);
21295 fprintf (file, "\tbx\tr12\n");
21296 ASM_OUTPUT_ALIGN (file, 2);
21297 assemble_name (file, label);
21298 fputs (":\n", file);
21301 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21302 rtx tem = XEXP (DECL_RTL (function), 0);
21303 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21304 tem = gen_rtx_MINUS (GET_MODE (tem),
21306 gen_rtx_SYMBOL_REF (Pmode,
21307 ggc_strdup (labelpc)));
21308 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21311 /* Output ".word .LTHUNKn". */
21312 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21314 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21315 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21319 fputs ("\tb\t", file);
21320 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21321 if (NEED_PLT_RELOC)
21322 fputs ("(PLT)", file);
21323 fputc ('\n', file);
21328 arm_emit_vector_const (FILE *file, rtx x)
21331 const char * pattern;
21333 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21335 switch (GET_MODE (x))
21337 case V2SImode: pattern = "%08x"; break;
21338 case V4HImode: pattern = "%04x"; break;
21339 case V8QImode: pattern = "%02x"; break;
21340 default: gcc_unreachable ();
21343 fprintf (file, "0x");
21344 for (i = CONST_VECTOR_NUNITS (x); i--;)
21348 element = CONST_VECTOR_ELT (x, i);
21349 fprintf (file, pattern, INTVAL (element));
21355 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21356 HFmode constant pool entries are actually loaded with ldr. */
21358 arm_emit_fp16_const (rtx c)
21363 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21364 bits = real_to_target (NULL, &r, HFmode);
21365 if (WORDS_BIG_ENDIAN)
21366 assemble_zeros (2);
21367 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21368 if (!WORDS_BIG_ENDIAN)
21369 assemble_zeros (2);
21373 arm_output_load_gr (rtx *operands)
21380 if (GET_CODE (operands [1]) != MEM
21381 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21382 || GET_CODE (reg = XEXP (sum, 0)) != REG
21383 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21384 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21385 return "wldrw%?\t%0, %1";
21387 /* Fix up an out-of-range load of a GR register. */
21388 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21389 wcgr = operands[0];
21391 output_asm_insn ("ldr%?\t%0, %1", operands);
21393 operands[0] = wcgr;
21395 output_asm_insn ("tmcr%?\t%0, %1", operands);
21396 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21401 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21403 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21404 named arg and all anonymous args onto the stack.
21405 XXX I know the prologue shouldn't be pushing registers, but it is faster
21409 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21410 enum machine_mode mode,
21413 int second_time ATTRIBUTE_UNUSED)
21417 cfun->machine->uses_anonymous_args = 1;
21418 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21420 nregs = pcum->aapcs_ncrn;
21421 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21425 nregs = pcum->nregs;
21427 if (nregs < NUM_ARG_REGS)
21428 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21431 /* Return nonzero if the CONSUMER instruction (a store) does not need
21432 PRODUCER's value to calculate the address. */
21435 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21437 rtx value = PATTERN (producer);
21438 rtx addr = PATTERN (consumer);
21440 if (GET_CODE (value) == COND_EXEC)
21441 value = COND_EXEC_CODE (value);
21442 if (GET_CODE (value) == PARALLEL)
21443 value = XVECEXP (value, 0, 0);
21444 value = XEXP (value, 0);
21445 if (GET_CODE (addr) == COND_EXEC)
21446 addr = COND_EXEC_CODE (addr);
21447 if (GET_CODE (addr) == PARALLEL)
21448 addr = XVECEXP (addr, 0, 0);
21449 addr = XEXP (addr, 0);
21451 return !reg_overlap_mentioned_p (value, addr);
21454 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21455 have an early register shift value or amount dependency on the
21456 result of PRODUCER. */
21459 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21461 rtx value = PATTERN (producer);
21462 rtx op = PATTERN (consumer);
21465 if (GET_CODE (value) == COND_EXEC)
21466 value = COND_EXEC_CODE (value);
21467 if (GET_CODE (value) == PARALLEL)
21468 value = XVECEXP (value, 0, 0);
21469 value = XEXP (value, 0);
21470 if (GET_CODE (op) == COND_EXEC)
21471 op = COND_EXEC_CODE (op);
21472 if (GET_CODE (op) == PARALLEL)
21473 op = XVECEXP (op, 0, 0);
21476 early_op = XEXP (op, 0);
21477 /* This is either an actual independent shift, or a shift applied to
21478 the first operand of another operation. We want the whole shift
21480 if (GET_CODE (early_op) == REG)
21483 return !reg_overlap_mentioned_p (value, early_op);
21486 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21487 have an early register shift value dependency on the result of
21491 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21493 rtx value = PATTERN (producer);
21494 rtx op = PATTERN (consumer);
21497 if (GET_CODE (value) == COND_EXEC)
21498 value = COND_EXEC_CODE (value);
21499 if (GET_CODE (value) == PARALLEL)
21500 value = XVECEXP (value, 0, 0);
21501 value = XEXP (value, 0);
21502 if (GET_CODE (op) == COND_EXEC)
21503 op = COND_EXEC_CODE (op);
21504 if (GET_CODE (op) == PARALLEL)
21505 op = XVECEXP (op, 0, 0);
21508 early_op = XEXP (op, 0);
21510 /* This is either an actual independent shift, or a shift applied to
21511 the first operand of another operation. We want the value being
21512 shifted, in either case. */
21513 if (GET_CODE (early_op) != REG)
21514 early_op = XEXP (early_op, 0);
21516 return !reg_overlap_mentioned_p (value, early_op);
21519 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21520 have an early register mult dependency on the result of
21524 arm_no_early_mul_dep (rtx producer, rtx consumer)
21526 rtx value = PATTERN (producer);
21527 rtx op = PATTERN (consumer);
21529 if (GET_CODE (value) == COND_EXEC)
21530 value = COND_EXEC_CODE (value);
21531 if (GET_CODE (value) == PARALLEL)
21532 value = XVECEXP (value, 0, 0);
21533 value = XEXP (value, 0);
21534 if (GET_CODE (op) == COND_EXEC)
21535 op = COND_EXEC_CODE (op);
21536 if (GET_CODE (op) == PARALLEL)
21537 op = XVECEXP (op, 0, 0);
21540 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
21542 if (GET_CODE (XEXP (op, 0)) == MULT)
21543 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
21545 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
21551 /* We can't rely on the caller doing the proper promotion when
21552 using APCS or ATPCS. */
21555 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
21557 return !TARGET_AAPCS_BASED;
21560 static enum machine_mode
21561 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
21562 enum machine_mode mode,
21563 int *punsignedp ATTRIBUTE_UNUSED,
21564 const_tree fntype ATTRIBUTE_UNUSED,
21565 int for_return ATTRIBUTE_UNUSED)
21567 if (GET_MODE_CLASS (mode) == MODE_INT
21568 && GET_MODE_SIZE (mode) < 4)
21574 /* AAPCS based ABIs use short enums by default. */
21577 arm_default_short_enums (void)
21579 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
21583 /* AAPCS requires that anonymous bitfields affect structure alignment. */
21586 arm_align_anon_bitfield (void)
21588 return TARGET_AAPCS_BASED;
21592 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
21595 arm_cxx_guard_type (void)
21597 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
21600 /* Return non-zero if the consumer (a multiply-accumulate instruction)
21601 has an accumulator dependency on the result of the producer (a
21602 multiplication instruction) and no other dependency on that result. */
21604 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
21606 rtx mul = PATTERN (producer);
21607 rtx mac = PATTERN (consumer);
21609 rtx mac_op0, mac_op1, mac_acc;
21611 if (GET_CODE (mul) == COND_EXEC)
21612 mul = COND_EXEC_CODE (mul);
21613 if (GET_CODE (mac) == COND_EXEC)
21614 mac = COND_EXEC_CODE (mac);
21616 /* Check that mul is of the form (set (...) (mult ...))
21617 and mla is of the form (set (...) (plus (mult ...) (...))). */
21618 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
21619 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
21620 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
21623 mul_result = XEXP (mul, 0);
21624 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
21625 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
21626 mac_acc = XEXP (XEXP (mac, 1), 1);
21628 return (reg_overlap_mentioned_p (mul_result, mac_acc)
21629 && !reg_overlap_mentioned_p (mul_result, mac_op0)
21630 && !reg_overlap_mentioned_p (mul_result, mac_op1));
21634 /* The EABI says test the least significant bit of a guard variable. */
21637 arm_cxx_guard_mask_bit (void)
21639 return TARGET_AAPCS_BASED;
21643 /* The EABI specifies that all array cookies are 8 bytes long. */
21646 arm_get_cookie_size (tree type)
21650 if (!TARGET_AAPCS_BASED)
21651 return default_cxx_get_cookie_size (type);
21653 size = build_int_cst (sizetype, 8);
21658 /* The EABI says that array cookies should also contain the element size. */
21661 arm_cookie_has_size (void)
21663 return TARGET_AAPCS_BASED;
21667 /* The EABI says constructors and destructors should return a pointer to
21668 the object constructed/destroyed. */
21671 arm_cxx_cdtor_returns_this (void)
21673 return TARGET_AAPCS_BASED;
21676 /* The EABI says that an inline function may never be the key
21680 arm_cxx_key_method_may_be_inline (void)
21682 return !TARGET_AAPCS_BASED;
21686 arm_cxx_determine_class_data_visibility (tree decl)
21688 if (!TARGET_AAPCS_BASED
21689 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
21692 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
21693 is exported. However, on systems without dynamic vague linkage,
21694 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
21695 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
21696 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
21698 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
21699 DECL_VISIBILITY_SPECIFIED (decl) = 1;
21703 arm_cxx_class_data_always_comdat (void)
21705 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
21706 vague linkage if the class has no key function. */
21707 return !TARGET_AAPCS_BASED;
21711 /* The EABI says __aeabi_atexit should be used to register static
21715 arm_cxx_use_aeabi_atexit (void)
21717 return TARGET_AAPCS_BASED;
21722 arm_set_return_address (rtx source, rtx scratch)
21724 arm_stack_offsets *offsets;
21725 HOST_WIDE_INT delta;
21727 unsigned long saved_regs;
21729 offsets = arm_get_frame_offsets ();
21730 saved_regs = offsets->saved_regs_mask;
21732 if ((saved_regs & (1 << LR_REGNUM)) == 0)
21733 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
21736 if (frame_pointer_needed)
21737 addr = plus_constant(hard_frame_pointer_rtx, -4);
21740 /* LR will be the first saved register. */
21741 delta = offsets->outgoing_args - (offsets->frame + 4);
21746 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
21747 GEN_INT (delta & ~4095)));
21752 addr = stack_pointer_rtx;
21754 addr = plus_constant (addr, delta);
21756 emit_move_insn (gen_frame_mem (Pmode, addr), source);
21762 thumb_set_return_address (rtx source, rtx scratch)
21764 arm_stack_offsets *offsets;
21765 HOST_WIDE_INT delta;
21766 HOST_WIDE_INT limit;
21769 unsigned long mask;
21773 offsets = arm_get_frame_offsets ();
21774 mask = offsets->saved_regs_mask;
21775 if (mask & (1 << LR_REGNUM))
21778 /* Find the saved regs. */
21779 if (frame_pointer_needed)
21781 delta = offsets->soft_frame - offsets->saved_args;
21782 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
21788 delta = offsets->outgoing_args - offsets->saved_args;
21791 /* Allow for the stack frame. */
21792 if (TARGET_THUMB1 && TARGET_BACKTRACE)
21794 /* The link register is always the first saved register. */
21797 /* Construct the address. */
21798 addr = gen_rtx_REG (SImode, reg);
21801 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
21802 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
21806 addr = plus_constant (addr, delta);
21808 emit_move_insn (gen_frame_mem (Pmode, addr), source);
21811 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
21814 /* Implements target hook vector_mode_supported_p. */
21816 arm_vector_mode_supported_p (enum machine_mode mode)
21818 /* Neon also supports V2SImode, etc. listed in the clause below. */
21819 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
21820 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
21823 if ((TARGET_NEON || TARGET_IWMMXT)
21824 && ((mode == V2SImode)
21825 || (mode == V4HImode)
21826 || (mode == V8QImode)))
21832 /* Implements target hook small_register_classes_for_mode_p. */
21834 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
21836 return TARGET_THUMB1;
21839 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
21840 ARM insns and therefore guarantee that the shift count is modulo 256.
21841 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
21842 guarantee no particular behavior for out-of-range counts. */
21844 static unsigned HOST_WIDE_INT
21845 arm_shift_truncation_mask (enum machine_mode mode)
21847 return mode == SImode ? 255 : 0;
21851 /* Map internal gcc register numbers to DWARF2 register numbers. */
21854 arm_dbx_register_number (unsigned int regno)
21859 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
21860 compatibility. The EABI defines them as registers 96-103. */
21861 if (IS_FPA_REGNUM (regno))
21862 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
21864 if (IS_VFP_REGNUM (regno))
21866 /* See comment in arm_dwarf_register_span. */
21867 if (VFP_REGNO_OK_FOR_SINGLE (regno))
21868 return 64 + regno - FIRST_VFP_REGNUM;
21870 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
21873 if (IS_IWMMXT_GR_REGNUM (regno))
21874 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
21876 if (IS_IWMMXT_REGNUM (regno))
21877 return 112 + regno - FIRST_IWMMXT_REGNUM;
21879 gcc_unreachable ();
21882 /* Dwarf models VFPv3 registers as 32 64-bit registers.
21883 GCC models tham as 64 32-bit registers, so we need to describe this to
21884 the DWARF generation code. Other registers can use the default. */
21886 arm_dwarf_register_span (rtx rtl)
21893 regno = REGNO (rtl);
21894 if (!IS_VFP_REGNUM (regno))
21897 /* XXX FIXME: The EABI defines two VFP register ranges:
21898 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
21900 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
21901 corresponding D register. Until GDB supports this, we shall use the
21902 legacy encodings. We also use these encodings for D0-D15 for
21903 compatibility with older debuggers. */
21904 if (VFP_REGNO_OK_FOR_SINGLE (regno))
21907 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
21908 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
21909 regno = (regno - FIRST_VFP_REGNUM) / 2;
21910 for (i = 0; i < nregs; i++)
21911 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
21916 #ifdef TARGET_UNWIND_INFO
21917 /* Emit unwind directives for a store-multiple instruction or stack pointer
21918 push during alignment.
21919 These should only ever be generated by the function prologue code, so
21920 expect them to have a particular form. */
21923 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
21926 HOST_WIDE_INT offset;
21927 HOST_WIDE_INT nregs;
21933 e = XVECEXP (p, 0, 0);
21934 if (GET_CODE (e) != SET)
21937 /* First insn will adjust the stack pointer. */
21938 if (GET_CODE (e) != SET
21939 || GET_CODE (XEXP (e, 0)) != REG
21940 || REGNO (XEXP (e, 0)) != SP_REGNUM
21941 || GET_CODE (XEXP (e, 1)) != PLUS)
21944 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
21945 nregs = XVECLEN (p, 0) - 1;
21947 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
21950 /* The function prologue may also push pc, but not annotate it as it is
21951 never restored. We turn this into a stack pointer adjustment. */
21952 if (nregs * 4 == offset - 4)
21954 fprintf (asm_out_file, "\t.pad #4\n");
21958 fprintf (asm_out_file, "\t.save {");
21960 else if (IS_VFP_REGNUM (reg))
21963 fprintf (asm_out_file, "\t.vsave {");
21965 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
21967 /* FPA registers are done differently. */
21968 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
21972 /* Unknown register type. */
21975 /* If the stack increment doesn't match the size of the saved registers,
21976 something has gone horribly wrong. */
21977 if (offset != nregs * reg_size)
21982 /* The remaining insns will describe the stores. */
21983 for (i = 1; i <= nregs; i++)
21985 /* Expect (set (mem <addr>) (reg)).
21986 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
21987 e = XVECEXP (p, 0, i);
21988 if (GET_CODE (e) != SET
21989 || GET_CODE (XEXP (e, 0)) != MEM
21990 || GET_CODE (XEXP (e, 1)) != REG)
21993 reg = REGNO (XEXP (e, 1));
21998 fprintf (asm_out_file, ", ");
21999 /* We can't use %r for vfp because we need to use the
22000 double precision register names. */
22001 if (IS_VFP_REGNUM (reg))
22002 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22004 asm_fprintf (asm_out_file, "%r", reg);
22006 #ifdef ENABLE_CHECKING
22007 /* Check that the addresses are consecutive. */
22008 e = XEXP (XEXP (e, 0), 0);
22009 if (GET_CODE (e) == PLUS)
22011 offset += reg_size;
22012 if (GET_CODE (XEXP (e, 0)) != REG
22013 || REGNO (XEXP (e, 0)) != SP_REGNUM
22014 || GET_CODE (XEXP (e, 1)) != CONST_INT
22015 || offset != INTVAL (XEXP (e, 1)))
22019 || GET_CODE (e) != REG
22020 || REGNO (e) != SP_REGNUM)
22024 fprintf (asm_out_file, "}\n");
22027 /* Emit unwind directives for a SET. */
22030 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22038 switch (GET_CODE (e0))
22041 /* Pushing a single register. */
22042 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22043 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22044 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22047 asm_fprintf (asm_out_file, "\t.save ");
22048 if (IS_VFP_REGNUM (REGNO (e1)))
22049 asm_fprintf(asm_out_file, "{d%d}\n",
22050 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22052 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22056 if (REGNO (e0) == SP_REGNUM)
22058 /* A stack increment. */
22059 if (GET_CODE (e1) != PLUS
22060 || GET_CODE (XEXP (e1, 0)) != REG
22061 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22062 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22065 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22066 -INTVAL (XEXP (e1, 1)));
22068 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22070 HOST_WIDE_INT offset;
22072 if (GET_CODE (e1) == PLUS)
22074 if (GET_CODE (XEXP (e1, 0)) != REG
22075 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22077 reg = REGNO (XEXP (e1, 0));
22078 offset = INTVAL (XEXP (e1, 1));
22079 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22080 HARD_FRAME_POINTER_REGNUM, reg,
22083 else if (GET_CODE (e1) == REG)
22086 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22087 HARD_FRAME_POINTER_REGNUM, reg);
22092 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22094 /* Move from sp to reg. */
22095 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22097 else if (GET_CODE (e1) == PLUS
22098 && GET_CODE (XEXP (e1, 0)) == REG
22099 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22100 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22102 /* Set reg to offset from sp. */
22103 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22104 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22106 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22108 /* Stack pointer save before alignment. */
22110 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22123 /* Emit unwind directives for the given insn. */
22126 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22130 if (!ARM_EABI_UNWIND_TABLES)
22133 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22134 && (TREE_NOTHROW (current_function_decl)
22135 || crtl->all_throwers_are_sibcalls))
22138 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22141 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22143 pat = XEXP (pat, 0);
22145 pat = PATTERN (insn);
22147 switch (GET_CODE (pat))
22150 arm_unwind_emit_set (asm_out_file, pat);
22154 /* Store multiple. */
22155 arm_unwind_emit_sequence (asm_out_file, pat);
22164 /* Output a reference from a function exception table to the type_info
22165 object X. The EABI specifies that the symbol should be relocated by
22166 an R_ARM_TARGET2 relocation. */
22169 arm_output_ttype (rtx x)
22171 fputs ("\t.word\t", asm_out_file);
22172 output_addr_const (asm_out_file, x);
22173 /* Use special relocations for symbol references. */
22174 if (GET_CODE (x) != CONST_INT)
22175 fputs ("(TARGET2)", asm_out_file);
22176 fputc ('\n', asm_out_file);
22180 #endif /* TARGET_UNWIND_INFO */
22183 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22184 stack alignment. */
22187 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22189 rtx unspec = SET_SRC (pattern);
22190 gcc_assert (GET_CODE (unspec) == UNSPEC);
22194 case UNSPEC_STACK_ALIGN:
22195 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22196 put anything on the stack, so hopefully it won't matter.
22197 CFA = SP will be correct after alignment. */
22198 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22199 SET_DEST (pattern));
22202 gcc_unreachable ();
22207 /* Output unwind directives for the start/end of a function. */
22210 arm_output_fn_unwind (FILE * f, bool prologue)
22212 if (!ARM_EABI_UNWIND_TABLES)
22216 fputs ("\t.fnstart\n", f);
22219 /* If this function will never be unwound, then mark it as such.
22220 The came condition is used in arm_unwind_emit to suppress
22221 the frame annotations. */
22222 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22223 && (TREE_NOTHROW (current_function_decl)
22224 || crtl->all_throwers_are_sibcalls))
22225 fputs("\t.cantunwind\n", f);
22227 fputs ("\t.fnend\n", f);
22232 arm_emit_tls_decoration (FILE *fp, rtx x)
22234 enum tls_reloc reloc;
22237 val = XVECEXP (x, 0, 0);
22238 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22240 output_addr_const (fp, val);
22245 fputs ("(tlsgd)", fp);
22248 fputs ("(tlsldm)", fp);
22251 fputs ("(tlsldo)", fp);
22254 fputs ("(gottpoff)", fp);
22257 fputs ("(tpoff)", fp);
22260 gcc_unreachable ();
22268 fputs (" + (. - ", fp);
22269 output_addr_const (fp, XVECEXP (x, 0, 2));
22271 output_addr_const (fp, XVECEXP (x, 0, 3));
22281 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22284 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22286 gcc_assert (size == 4);
22287 fputs ("\t.word\t", file);
22288 output_addr_const (file, x);
22289 fputs ("(tlsldo)", file);
22293 arm_output_addr_const_extra (FILE *fp, rtx x)
22295 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22296 return arm_emit_tls_decoration (fp, x);
22297 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22300 int labelno = INTVAL (XVECEXP (x, 0, 0));
22302 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22303 assemble_name_raw (fp, label);
22307 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22309 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22313 output_addr_const (fp, XVECEXP (x, 0, 0));
22317 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22319 output_addr_const (fp, XVECEXP (x, 0, 0));
22323 output_addr_const (fp, XVECEXP (x, 0, 1));
22327 else if (GET_CODE (x) == CONST_VECTOR)
22328 return arm_emit_vector_const (fp, x);
22333 /* Output assembly for a shift instruction.
22334 SET_FLAGS determines how the instruction modifies the condition codes.
22335 0 - Do not set condition codes.
22336 1 - Set condition codes.
22337 2 - Use smallest instruction. */
22339 arm_output_shift(rtx * operands, int set_flags)
22342 static const char flag_chars[3] = {'?', '.', '!'};
22347 c = flag_chars[set_flags];
22348 if (TARGET_UNIFIED_ASM)
22350 shift = shift_op(operands[3], &val);
22354 operands[2] = GEN_INT(val);
22355 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
22358 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
22361 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
22362 output_asm_insn (pattern, operands);
22366 /* Output a Thumb-1 casesi dispatch sequence. */
22368 thumb1_output_casesi (rtx *operands)
22370 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
22372 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22374 switch (GET_MODE(diff_vec))
22377 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22378 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
22380 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22381 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
22383 return "bl\t%___gnu_thumb1_case_si";
22385 gcc_unreachable ();
22389 /* Output a Thumb-2 casesi instruction. */
22391 thumb2_output_casesi (rtx *operands)
22393 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
22395 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22397 output_asm_insn ("cmp\t%0, %1", operands);
22398 output_asm_insn ("bhi\t%l3", operands);
22399 switch (GET_MODE(diff_vec))
22402 return "tbb\t[%|pc, %0]";
22404 return "tbh\t[%|pc, %0, lsl #1]";
22408 output_asm_insn ("adr\t%4, %l2", operands);
22409 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
22410 output_asm_insn ("add\t%4, %4, %5", operands);
22415 output_asm_insn ("adr\t%4, %l2", operands);
22416 return "ldr\t%|pc, [%4, %0, lsl #2]";
22419 gcc_unreachable ();
22423 /* Most ARM cores are single issue, but some newer ones can dual issue.
22424 The scheduler descriptions rely on this being correct. */
22426 arm_issue_rate (void)
22441 /* A table and a function to perform ARM-specific name mangling for
22442 NEON vector types in order to conform to the AAPCS (see "Procedure
22443 Call Standard for the ARM Architecture", Appendix A). To qualify
22444 for emission with the mangled names defined in that document, a
22445 vector type must not only be of the correct mode but also be
22446 composed of NEON vector element types (e.g. __builtin_neon_qi). */
22449 enum machine_mode mode;
22450 const char *element_type_name;
22451 const char *aapcs_name;
22452 } arm_mangle_map_entry;
22454 static arm_mangle_map_entry arm_mangle_map[] = {
22455 /* 64-bit containerized types. */
22456 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
22457 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
22458 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
22459 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
22460 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
22461 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
22462 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
22463 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
22464 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
22465 /* 128-bit containerized types. */
22466 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
22467 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
22468 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
22469 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
22470 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
22471 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
22472 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
22473 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
22474 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
22475 { VOIDmode, NULL, NULL }
22479 arm_mangle_type (const_tree type)
22481 arm_mangle_map_entry *pos = arm_mangle_map;
22483 /* The ARM ABI documents (10th October 2008) say that "__va_list"
22484 has to be managled as if it is in the "std" namespace. */
22485 if (TARGET_AAPCS_BASED
22486 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
22488 static bool warned;
22489 if (!warned && warn_psabi && !in_system_header)
22492 inform (input_location,
22493 "the mangling of %<va_list%> has changed in GCC 4.4");
22495 return "St9__va_list";
22498 /* Half-precision float. */
22499 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
22502 if (TREE_CODE (type) != VECTOR_TYPE)
22505 /* Check the mode of the vector type, and the name of the vector
22506 element type, against the table. */
22507 while (pos->mode != VOIDmode)
22509 tree elt_type = TREE_TYPE (type);
22511 if (pos->mode == TYPE_MODE (type)
22512 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
22513 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
22514 pos->element_type_name))
22515 return pos->aapcs_name;
22520 /* Use the default mangling for unrecognized (possibly user-defined)
22525 /* Order of allocation of core registers for Thumb: this allocation is
22526 written over the corresponding initial entries of the array
22527 initialized with REG_ALLOC_ORDER. We allocate all low registers
22528 first. Saving and restoring a low register is usually cheaper than
22529 using a call-clobbered high register. */
22531 static const int thumb_core_reg_alloc_order[] =
22533 3, 2, 1, 0, 4, 5, 6, 7,
22534 14, 12, 8, 9, 10, 11, 13, 15
22537 /* Adjust register allocation order when compiling for Thumb. */
22540 arm_order_regs_for_local_alloc (void)
22542 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
22543 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
22545 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
22546 sizeof (thumb_core_reg_alloc_order));
22549 /* Set default optimization options. */
22551 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
22553 /* Enable section anchors by default at -O1 or higher.
22554 Use 2 to distinguish from an explicit -fsection-anchors
22555 given on the command line. */
22557 flag_section_anchors = 2;
22560 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
22563 arm_frame_pointer_required (void)
22565 return (cfun->has_nonlocal_label
22566 || SUBTARGET_FRAME_POINTER_REQUIRED
22567 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
22570 /* Only thumb1 can't support conditional execution, so return true if
22571 the target is not thumb1. */
22573 arm_have_conditional_execution (void)
22575 return !TARGET_THUMB1;
22578 #include "gt-arm.h"