1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
67 /* Forward function declarations. */
68 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
69 static int arm_compute_static_chain_stack_bytes (void);
70 static arm_stack_offsets *arm_get_frame_offsets (void);
71 static void arm_add_gc_roots (void);
72 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
73 HOST_WIDE_INT, rtx, rtx, int, int);
74 static unsigned bit_count (unsigned long);
75 static int arm_address_register_rtx_p (rtx, int);
76 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
77 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
78 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
79 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
80 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
81 inline static int thumb1_index_register_rtx_p (rtx, int);
82 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
83 static int thumb_far_jump_used_p (void);
84 static bool thumb_force_lr_save (void);
85 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
86 static rtx emit_sfm (int, int);
87 static unsigned arm_size_return_regs (void);
88 static bool arm_assemble_integer (rtx, unsigned int, int);
89 static void arm_print_operand (FILE *, rtx, int);
90 static void arm_print_operand_address (FILE *, rtx);
91 static bool arm_print_operand_punct_valid_p (unsigned char code);
92 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
93 static arm_cc get_arm_condition_code (rtx);
94 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
95 static rtx is_jump_table (rtx);
96 static const char *output_multi_immediate (rtx *, const char *, const char *,
98 static const char *shift_op (rtx, HOST_WIDE_INT *);
99 static struct machine_function *arm_init_machine_status (void);
100 static void thumb_exit (FILE *, int);
101 static rtx is_jump_table (rtx);
102 static HOST_WIDE_INT get_jump_table_size (rtx);
103 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
104 static Mnode *add_minipool_forward_ref (Mfix *);
105 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
106 static Mnode *add_minipool_backward_ref (Mfix *);
107 static void assign_minipool_offsets (Mfix *);
108 static void arm_print_value (FILE *, rtx);
109 static void dump_minipool (rtx);
110 static int arm_barrier_cost (rtx);
111 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
112 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
113 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
115 static void arm_reorg (void);
116 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
117 static unsigned long arm_compute_save_reg0_reg12_mask (void);
118 static unsigned long arm_compute_save_reg_mask (void);
119 static unsigned long arm_isr_value (tree);
120 static unsigned long arm_compute_func_type (void);
121 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
123 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
124 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
125 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
127 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
128 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
130 static int arm_comp_type_attributes (const_tree, const_tree);
131 static void arm_set_default_type_attributes (tree);
132 static int arm_adjust_cost (rtx, rtx, rtx, int);
133 static int count_insns_for_constant (HOST_WIDE_INT, int);
134 static int arm_get_strip_length (int);
135 static bool arm_function_ok_for_sibcall (tree, tree);
136 static enum machine_mode arm_promote_function_mode (const_tree,
137 enum machine_mode, int *,
139 static bool arm_return_in_memory (const_tree, const_tree);
140 static rtx arm_function_value (const_tree, const_tree, bool);
141 static rtx arm_libcall_value (enum machine_mode, const_rtx);
143 static void arm_internal_label (FILE *, const char *, unsigned long);
144 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
146 static bool arm_have_conditional_execution (void);
147 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
148 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
149 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
150 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
151 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
153 static bool arm_rtx_costs (rtx, int, int, int *, bool);
154 static int arm_address_cost (rtx, bool);
155 static bool arm_memory_load_p (rtx);
156 static bool arm_cirrus_insn_p (rtx);
157 static void cirrus_reorg (rtx);
158 static void arm_init_builtins (void);
159 static void arm_init_iwmmxt_builtins (void);
160 static rtx safe_vector_operand (rtx, enum machine_mode);
161 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
162 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
163 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
164 static void emit_constant_insn (rtx cond, rtx pattern);
165 static rtx emit_set_insn (rtx, rtx);
166 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
168 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
170 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
172 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
173 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
175 static int aapcs_select_return_coproc (const_tree, const_tree);
177 #ifdef OBJECT_FORMAT_ELF
178 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
179 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
182 static void arm_encode_section_info (tree, rtx, int);
185 static void arm_file_end (void);
186 static void arm_file_start (void);
188 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
190 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
191 enum machine_mode, const_tree, bool);
192 static bool arm_promote_prototypes (const_tree);
193 static bool arm_default_short_enums (void);
194 static bool arm_align_anon_bitfield (void);
195 static bool arm_return_in_msb (const_tree);
196 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
197 static bool arm_return_in_memory (const_tree, const_tree);
199 static void arm_unwind_emit (FILE *, rtx);
200 static bool arm_output_ttype (rtx);
201 static void arm_asm_emit_except_personality (rtx);
202 static void arm_asm_init_sections (void);
204 static enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
205 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
206 static rtx arm_dwarf_register_span (rtx);
208 static tree arm_cxx_guard_type (void);
209 static bool arm_cxx_guard_mask_bit (void);
210 static tree arm_get_cookie_size (tree);
211 static bool arm_cookie_has_size (void);
212 static bool arm_cxx_cdtor_returns_this (void);
213 static bool arm_cxx_key_method_may_be_inline (void);
214 static void arm_cxx_determine_class_data_visibility (tree);
215 static bool arm_cxx_class_data_always_comdat (void);
216 static bool arm_cxx_use_aeabi_atexit (void);
217 static void arm_init_libfuncs (void);
218 static tree arm_build_builtin_va_list (void);
219 static void arm_expand_builtin_va_start (tree, rtx);
220 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
221 static void arm_option_override (void);
222 static bool arm_handle_option (struct gcc_options *, struct gcc_options *,
223 const struct cl_decoded_option *, location_t);
224 static void arm_target_help (void);
225 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
226 static bool arm_cannot_copy_insn_p (rtx);
227 static bool arm_tls_symbol_p (rtx x);
228 static int arm_issue_rate (void);
229 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
230 static bool arm_output_addr_const_extra (FILE *, rtx);
231 static bool arm_allocate_stack_slots_for_args (void);
232 static const char *arm_invalid_parameter_type (const_tree t);
233 static const char *arm_invalid_return_type (const_tree t);
234 static tree arm_promoted_type (const_tree t);
235 static tree arm_convert_to_type (tree type, tree expr);
236 static bool arm_scalar_mode_supported_p (enum machine_mode);
237 static bool arm_frame_pointer_required (void);
238 static bool arm_can_eliminate (const int, const int);
239 static void arm_asm_trampoline_template (FILE *);
240 static void arm_trampoline_init (rtx, tree, rtx);
241 static rtx arm_trampoline_adjust_address (rtx);
242 static rtx arm_pic_static_addr (rtx orig, rtx reg);
243 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
244 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
245 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
246 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
247 static bool arm_class_likely_spilled_p (reg_class_t);
248 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
249 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
253 static void arm_conditional_register_usage (void);
254 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
255 static unsigned int arm_autovectorize_vector_sizes (void);
258 /* Table of machine attributes. */
259 static const struct attribute_spec arm_attribute_table[] =
261 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
262 affects_type_identity } */
263 /* Function calls made to this symbol must be done indirectly, because
264 it may lie outside of the 26 bit addressing range of a normal function
266 { "long_call", 0, 0, false, true, true, NULL, false },
267 /* Whereas these functions are always known to reside within the 26 bit
269 { "short_call", 0, 0, false, true, true, NULL, false },
270 /* Specify the procedure call conventions for a function. */
271 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
273 /* Interrupt Service Routines have special prologue and epilogue requirements. */
274 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
276 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
278 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
281 /* ARM/PE has three new attributes:
283 dllexport - for exporting a function/variable that will live in a dll
284 dllimport - for importing a function/variable from a dll
286 Microsoft allows multiple declspecs in one __declspec, separating
287 them with spaces. We do NOT support this. Instead, use __declspec
290 { "dllimport", 0, 0, true, false, false, NULL, false },
291 { "dllexport", 0, 0, true, false, false, NULL, false },
292 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
294 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
295 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
296 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
297 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
300 { NULL, 0, 0, false, false, false, NULL, false }
303 /* Set default optimization options. */
304 static const struct default_options arm_option_optimization_table[] =
306 /* Enable section anchors by default at -O1 or higher. */
307 { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
308 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
309 { OPT_LEVELS_NONE, 0, NULL, 0 }
312 /* Initialize the GCC target structure. */
313 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
314 #undef TARGET_MERGE_DECL_ATTRIBUTES
315 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
318 #undef TARGET_LEGITIMIZE_ADDRESS
319 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
321 #undef TARGET_ATTRIBUTE_TABLE
322 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
324 #undef TARGET_ASM_FILE_START
325 #define TARGET_ASM_FILE_START arm_file_start
326 #undef TARGET_ASM_FILE_END
327 #define TARGET_ASM_FILE_END arm_file_end
329 #undef TARGET_ASM_ALIGNED_SI_OP
330 #define TARGET_ASM_ALIGNED_SI_OP NULL
331 #undef TARGET_ASM_INTEGER
332 #define TARGET_ASM_INTEGER arm_assemble_integer
334 #undef TARGET_PRINT_OPERAND
335 #define TARGET_PRINT_OPERAND arm_print_operand
336 #undef TARGET_PRINT_OPERAND_ADDRESS
337 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
338 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
339 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
341 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
342 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
344 #undef TARGET_ASM_FUNCTION_PROLOGUE
345 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
347 #undef TARGET_ASM_FUNCTION_EPILOGUE
348 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
350 #undef TARGET_DEFAULT_TARGET_FLAGS
351 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
352 #undef TARGET_HANDLE_OPTION
353 #define TARGET_HANDLE_OPTION arm_handle_option
355 #define TARGET_HELP arm_target_help
356 #undef TARGET_OPTION_OVERRIDE
357 #define TARGET_OPTION_OVERRIDE arm_option_override
358 #undef TARGET_OPTION_OPTIMIZATION_TABLE
359 #define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
361 #undef TARGET_COMP_TYPE_ATTRIBUTES
362 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
364 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
365 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
367 #undef TARGET_SCHED_ADJUST_COST
368 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
370 #undef TARGET_ENCODE_SECTION_INFO
372 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
374 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
377 #undef TARGET_STRIP_NAME_ENCODING
378 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
380 #undef TARGET_ASM_INTERNAL_LABEL
381 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
383 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
384 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
386 #undef TARGET_FUNCTION_VALUE
387 #define TARGET_FUNCTION_VALUE arm_function_value
389 #undef TARGET_LIBCALL_VALUE
390 #define TARGET_LIBCALL_VALUE arm_libcall_value
392 #undef TARGET_ASM_OUTPUT_MI_THUNK
393 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
394 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
395 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
397 #undef TARGET_RTX_COSTS
398 #define TARGET_RTX_COSTS arm_rtx_costs
399 #undef TARGET_ADDRESS_COST
400 #define TARGET_ADDRESS_COST arm_address_cost
402 #undef TARGET_SHIFT_TRUNCATION_MASK
403 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
404 #undef TARGET_VECTOR_MODE_SUPPORTED_P
405 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
406 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
407 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
408 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
409 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
410 arm_autovectorize_vector_sizes
412 #undef TARGET_MACHINE_DEPENDENT_REORG
413 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
415 #undef TARGET_INIT_BUILTINS
416 #define TARGET_INIT_BUILTINS arm_init_builtins
417 #undef TARGET_EXPAND_BUILTIN
418 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
420 #undef TARGET_INIT_LIBFUNCS
421 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
423 #undef TARGET_PROMOTE_FUNCTION_MODE
424 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
425 #undef TARGET_PROMOTE_PROTOTYPES
426 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
427 #undef TARGET_PASS_BY_REFERENCE
428 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
429 #undef TARGET_ARG_PARTIAL_BYTES
430 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
431 #undef TARGET_FUNCTION_ARG
432 #define TARGET_FUNCTION_ARG arm_function_arg
433 #undef TARGET_FUNCTION_ARG_ADVANCE
434 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
435 #undef TARGET_FUNCTION_ARG_BOUNDARY
436 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
438 #undef TARGET_SETUP_INCOMING_VARARGS
439 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
441 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
442 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
444 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
445 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
446 #undef TARGET_TRAMPOLINE_INIT
447 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
448 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
449 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
451 #undef TARGET_DEFAULT_SHORT_ENUMS
452 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
454 #undef TARGET_ALIGN_ANON_BITFIELD
455 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
457 #undef TARGET_NARROW_VOLATILE_BITFIELD
458 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
460 #undef TARGET_CXX_GUARD_TYPE
461 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
463 #undef TARGET_CXX_GUARD_MASK_BIT
464 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
466 #undef TARGET_CXX_GET_COOKIE_SIZE
467 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
469 #undef TARGET_CXX_COOKIE_HAS_SIZE
470 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
472 #undef TARGET_CXX_CDTOR_RETURNS_THIS
473 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
475 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
476 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
478 #undef TARGET_CXX_USE_AEABI_ATEXIT
479 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
481 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
482 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
483 arm_cxx_determine_class_data_visibility
485 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
486 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
488 #undef TARGET_RETURN_IN_MSB
489 #define TARGET_RETURN_IN_MSB arm_return_in_msb
491 #undef TARGET_RETURN_IN_MEMORY
492 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
494 #undef TARGET_MUST_PASS_IN_STACK
495 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
498 #undef TARGET_ASM_UNWIND_EMIT
499 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
501 /* EABI unwinding tables use a different format for the typeinfo tables. */
502 #undef TARGET_ASM_TTYPE
503 #define TARGET_ASM_TTYPE arm_output_ttype
505 #undef TARGET_ARM_EABI_UNWINDER
506 #define TARGET_ARM_EABI_UNWINDER true
508 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
509 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
511 #undef TARGET_ASM_INIT_SECTIONS
512 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
513 #endif /* ARM_UNWIND_INFO */
515 #undef TARGET_EXCEPT_UNWIND_INFO
516 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
518 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
519 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
521 #undef TARGET_DWARF_REGISTER_SPAN
522 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
524 #undef TARGET_CANNOT_COPY_INSN_P
525 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
528 #undef TARGET_HAVE_TLS
529 #define TARGET_HAVE_TLS true
532 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
533 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
535 #undef TARGET_CANNOT_FORCE_CONST_MEM
536 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
538 #undef TARGET_MAX_ANCHOR_OFFSET
539 #define TARGET_MAX_ANCHOR_OFFSET 4095
541 /* The minimum is set such that the total size of the block
542 for a particular anchor is -4088 + 1 + 4095 bytes, which is
543 divisible by eight, ensuring natural spacing of anchors. */
544 #undef TARGET_MIN_ANCHOR_OFFSET
545 #define TARGET_MIN_ANCHOR_OFFSET -4088
547 #undef TARGET_SCHED_ISSUE_RATE
548 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
550 #undef TARGET_MANGLE_TYPE
551 #define TARGET_MANGLE_TYPE arm_mangle_type
553 #undef TARGET_BUILD_BUILTIN_VA_LIST
554 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
555 #undef TARGET_EXPAND_BUILTIN_VA_START
556 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
557 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
558 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
561 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
562 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
565 #undef TARGET_LEGITIMATE_ADDRESS_P
566 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
568 #undef TARGET_INVALID_PARAMETER_TYPE
569 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
571 #undef TARGET_INVALID_RETURN_TYPE
572 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
574 #undef TARGET_PROMOTED_TYPE
575 #define TARGET_PROMOTED_TYPE arm_promoted_type
577 #undef TARGET_CONVERT_TO_TYPE
578 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
580 #undef TARGET_SCALAR_MODE_SUPPORTED_P
581 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
583 #undef TARGET_FRAME_POINTER_REQUIRED
584 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
586 #undef TARGET_CAN_ELIMINATE
587 #define TARGET_CAN_ELIMINATE arm_can_eliminate
589 #undef TARGET_CONDITIONAL_REGISTER_USAGE
590 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
592 #undef TARGET_CLASS_LIKELY_SPILLED_P
593 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
595 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
596 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
597 arm_vector_alignment_reachable
599 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
600 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
601 arm_builtin_support_vector_misalignment
603 #undef TARGET_PREFERRED_RENAME_CLASS
604 #define TARGET_PREFERRED_RENAME_CLASS \
605 arm_preferred_rename_class
607 struct gcc_target targetm = TARGET_INITIALIZER;
609 /* Obstack for minipool constant handling. */
610 static struct obstack minipool_obstack;
611 static char * minipool_startobj;
613 /* The maximum number of insns skipped which
614 will be conditionalised if possible. */
615 static int max_insns_skipped = 5;
617 extern FILE * asm_out_file;
619 /* True if we are currently building a constant table. */
620 int making_const_table;
622 /* The processor for which instructions should be scheduled. */
623 enum processor_type arm_tune = arm_none;
625 /* The current tuning set. */
626 const struct tune_params *current_tune;
628 /* Which floating point hardware to schedule for. */
631 /* Which floating popint hardware to use. */
632 const struct arm_fpu_desc *arm_fpu_desc;
634 /* Whether to use floating point hardware. */
635 enum float_abi_type arm_float_abi;
637 /* Which __fp16 format to use. */
638 enum arm_fp16_format_type arm_fp16_format;
640 /* Which ABI to use. */
641 enum arm_abi_type arm_abi;
643 /* Which thread pointer model to use. */
644 enum arm_tp_type target_thread_pointer = TP_AUTO;
646 /* Used to parse -mstructure_size_boundary command line option. */
647 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
649 /* Used for Thumb call_via trampolines. */
650 rtx thumb_call_via_label[14];
651 static int thumb_call_reg_needed;
653 /* Bit values used to identify processor capabilities. */
654 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
655 #define FL_ARCH3M (1 << 1) /* Extended multiply */
656 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
657 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
658 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
659 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
660 #define FL_THUMB (1 << 6) /* Thumb aware */
661 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
662 #define FL_STRONG (1 << 8) /* StrongARM */
663 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
664 #define FL_XSCALE (1 << 10) /* XScale */
665 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
666 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
667 media instructions. */
668 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
669 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
670 Note: ARM6 & 7 derivatives only. */
671 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
672 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
673 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
675 #define FL_DIV (1 << 18) /* Hardware divide. */
676 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
677 #define FL_NEON (1 << 20) /* Neon instructions. */
678 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
680 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
682 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
684 /* Flags that only effect tuning, not available instructions. */
685 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
688 #define FL_FOR_ARCH2 FL_NOTM
689 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
690 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
691 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
692 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
693 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
694 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
695 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
696 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
697 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
698 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
699 #define FL_FOR_ARCH6J FL_FOR_ARCH6
700 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
701 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
702 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
703 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
704 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
705 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
706 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
707 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
708 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
709 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
711 /* The bits in this mask specify which
712 instructions we are allowed to generate. */
713 static unsigned long insn_flags = 0;
715 /* The bits in this mask specify which instruction scheduling options should
717 static unsigned long tune_flags = 0;
719 /* The following are used in the arm.md file as equivalents to bits
720 in the above two flag variables. */
722 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
725 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
728 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
731 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
734 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
737 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
740 /* Nonzero if this chip supports the ARM 6K extensions. */
743 /* Nonzero if this chip supports the ARM 7 extensions. */
746 /* Nonzero if instructions not present in the 'M' profile can be used. */
747 int arm_arch_notm = 0;
749 /* Nonzero if instructions present in ARMv7E-M can be used. */
752 /* Nonzero if this chip can benefit from load scheduling. */
753 int arm_ld_sched = 0;
755 /* Nonzero if this chip is a StrongARM. */
756 int arm_tune_strongarm = 0;
758 /* Nonzero if this chip is a Cirrus variant. */
759 int arm_arch_cirrus = 0;
761 /* Nonzero if this chip supports Intel Wireless MMX technology. */
762 int arm_arch_iwmmxt = 0;
764 /* Nonzero if this chip is an XScale. */
765 int arm_arch_xscale = 0;
767 /* Nonzero if tuning for XScale */
768 int arm_tune_xscale = 0;
770 /* Nonzero if we want to tune for stores that access the write-buffer.
771 This typically means an ARM6 or ARM7 with MMU or MPU. */
772 int arm_tune_wbuf = 0;
774 /* Nonzero if tuning for Cortex-A9. */
775 int arm_tune_cortex_a9 = 0;
777 /* Nonzero if generating Thumb instructions. */
780 /* Nonzero if generating Thumb-1 instructions. */
783 /* Nonzero if we should define __THUMB_INTERWORK__ in the
785 XXX This is a bit of a hack, it's intended to help work around
786 problems in GLD which doesn't understand that armv5t code is
787 interworking clean. */
788 int arm_cpp_interwork = 0;
790 /* Nonzero if chip supports Thumb 2. */
793 /* Nonzero if chip supports integer division instruction. */
796 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
797 we must report the mode of the memory reference from
798 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
799 enum machine_mode output_memory_reference_mode;
801 /* The register number to be used for the PIC offset register. */
802 unsigned arm_pic_register = INVALID_REGNUM;
804 /* Set to 1 after arm_reorg has started. Reset to start at the start of
805 the next function. */
806 static int after_arm_reorg = 0;
808 enum arm_pcs arm_pcs_default;
810 /* For an explanation of these variables, see final_prescan_insn below. */
812 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
813 enum arm_cond_code arm_current_cc;
816 int arm_target_label;
817 /* The number of conditionally executed insns, including the current insn. */
818 int arm_condexec_count = 0;
819 /* A bitmask specifying the patterns for the IT block.
820 Zero means do not output an IT block before this insn. */
821 int arm_condexec_mask = 0;
822 /* The number of bits used in arm_condexec_mask. */
823 int arm_condexec_masklen = 0;
825 /* The condition codes of the ARM, and the inverse function. */
826 static const char * const arm_condition_codes[] =
828 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
829 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
832 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
833 int arm_regs_in_sequence[] =
835 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
838 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
839 #define streq(string1, string2) (strcmp (string1, string2) == 0)
841 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
842 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
843 | (1 << PIC_OFFSET_TABLE_REGNUM)))
845 /* Initialization code. */
849 const char *const name;
850 enum processor_type core;
852 const unsigned long flags;
853 const struct tune_params *const tune;
857 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
858 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
863 const struct tune_params arm_slowmul_tune =
865 arm_slowmul_rtx_costs,
868 ARM_PREFETCH_NOT_BENEFICIAL
871 const struct tune_params arm_fastmul_tune =
873 arm_fastmul_rtx_costs,
876 ARM_PREFETCH_NOT_BENEFICIAL
879 const struct tune_params arm_xscale_tune =
881 arm_xscale_rtx_costs,
882 xscale_sched_adjust_cost,
884 ARM_PREFETCH_NOT_BENEFICIAL
887 const struct tune_params arm_9e_tune =
892 ARM_PREFETCH_NOT_BENEFICIAL
895 const struct tune_params arm_cortex_a9_tune =
898 cortex_a9_sched_adjust_cost,
900 ARM_PREFETCH_BENEFICIAL(4,32,32)
903 const struct tune_params arm_fa726te_tune =
906 fa726te_sched_adjust_cost,
908 ARM_PREFETCH_NOT_BENEFICIAL
912 /* Not all of these give usefully different compilation alternatives,
913 but there is no simple way of generalizing them. */
914 static const struct processors all_cores[] =
917 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
918 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
919 #include "arm-cores.def"
921 {NULL, arm_none, NULL, 0, NULL}
924 static const struct processors all_architectures[] =
926 /* ARM Architectures */
927 /* We don't specify tuning costs here as it will be figured out
930 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
931 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
932 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
933 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
934 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
935 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
936 implementations that support it, so we will leave it out for now. */
937 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
938 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
939 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
940 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
941 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
942 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
943 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
944 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
945 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
946 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
947 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
948 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
949 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
950 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
951 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
952 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
953 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
954 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
955 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
956 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
957 {NULL, arm_none, NULL, 0 , NULL}
961 /* These are populated as commandline arguments are processed, or NULL
963 static const struct processors *arm_selected_arch;
964 static const struct processors *arm_selected_cpu;
965 static const struct processors *arm_selected_tune;
967 /* The name of the preprocessor macro to define for this architecture. */
969 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
971 /* Available values for -mfpu=. */
973 static const struct arm_fpu_desc all_fpus[] =
975 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
976 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
977 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
978 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
979 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
980 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
981 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
982 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
983 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
984 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
985 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
986 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
987 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
988 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
989 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
990 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
991 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
992 /* Compatibility aliases. */
993 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
1000 enum float_abi_type abi_type;
1004 /* Available values for -mfloat-abi=. */
1006 static const struct float_abi all_float_abis[] =
1008 {"soft", ARM_FLOAT_ABI_SOFT},
1009 {"softfp", ARM_FLOAT_ABI_SOFTFP},
1010 {"hard", ARM_FLOAT_ABI_HARD}
1017 enum arm_fp16_format_type fp16_format_type;
1021 /* Available values for -mfp16-format=. */
1023 static const struct fp16_format all_fp16_formats[] =
1025 {"none", ARM_FP16_FORMAT_NONE},
1026 {"ieee", ARM_FP16_FORMAT_IEEE},
1027 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
1034 enum arm_abi_type abi_type;
1038 /* Available values for -mabi=. */
1040 static const struct abi_name arm_all_abis[] =
1042 {"apcs-gnu", ARM_ABI_APCS},
1043 {"atpcs", ARM_ABI_ATPCS},
1044 {"aapcs", ARM_ABI_AAPCS},
1045 {"iwmmxt", ARM_ABI_IWMMXT},
1046 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
1049 /* Supported TLS relocations. */
1059 /* The maximum number of insns to be used when loading a constant. */
1061 arm_constant_limit (bool size_p)
1063 return size_p ? 1 : current_tune->constant_limit;
1066 /* Emit an insn that's a simple single-set. Both the operands must be known
1069 emit_set_insn (rtx x, rtx y)
1071 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1074 /* Return the number of bits set in VALUE. */
1076 bit_count (unsigned long value)
1078 unsigned long count = 0;
1083 value &= value - 1; /* Clear the least-significant set bit. */
1089 /* Set up library functions unique to ARM. */
1092 arm_init_libfuncs (void)
1094 /* There are no special library functions unless we are using the
1099 /* The functions below are described in Section 4 of the "Run-Time
1100 ABI for the ARM architecture", Version 1.0. */
1102 /* Double-precision floating-point arithmetic. Table 2. */
1103 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1104 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1105 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1106 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1107 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1109 /* Double-precision comparisons. Table 3. */
1110 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1111 set_optab_libfunc (ne_optab, DFmode, NULL);
1112 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1113 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1114 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1115 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1116 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1118 /* Single-precision floating-point arithmetic. Table 4. */
1119 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1120 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1121 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1122 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1123 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1125 /* Single-precision comparisons. Table 5. */
1126 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1127 set_optab_libfunc (ne_optab, SFmode, NULL);
1128 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1129 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1130 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1131 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1132 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1134 /* Floating-point to integer conversions. Table 6. */
1135 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1136 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1137 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1138 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1139 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1140 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1141 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1142 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1144 /* Conversions between floating types. Table 7. */
1145 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1146 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1148 /* Integer to floating-point conversions. Table 8. */
1149 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1150 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1151 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1152 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1153 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1154 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1155 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1156 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1158 /* Long long. Table 9. */
1159 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1160 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1161 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1162 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1163 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1164 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1165 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1166 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1168 /* Integer (32/32->32) division. \S 4.3.1. */
1169 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1170 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1172 /* The divmod functions are designed so that they can be used for
1173 plain division, even though they return both the quotient and the
1174 remainder. The quotient is returned in the usual location (i.e.,
1175 r0 for SImode, {r0, r1} for DImode), just as would be expected
1176 for an ordinary division routine. Because the AAPCS calling
1177 conventions specify that all of { r0, r1, r2, r3 } are
1178 callee-saved registers, there is no need to tell the compiler
1179 explicitly that those registers are clobbered by these
1181 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1182 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1184 /* For SImode division the ABI provides div-without-mod routines,
1185 which are faster. */
1186 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1187 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1189 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1190 divmod libcalls instead. */
1191 set_optab_libfunc (smod_optab, DImode, NULL);
1192 set_optab_libfunc (umod_optab, DImode, NULL);
1193 set_optab_libfunc (smod_optab, SImode, NULL);
1194 set_optab_libfunc (umod_optab, SImode, NULL);
1196 /* Half-precision float operations. The compiler handles all operations
1197 with NULL libfuncs by converting the SFmode. */
1198 switch (arm_fp16_format)
1200 case ARM_FP16_FORMAT_IEEE:
1201 case ARM_FP16_FORMAT_ALTERNATIVE:
1204 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1205 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1207 : "__gnu_f2h_alternative"));
1208 set_conv_libfunc (sext_optab, SFmode, HFmode,
1209 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1211 : "__gnu_h2f_alternative"));
1214 set_optab_libfunc (add_optab, HFmode, NULL);
1215 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1216 set_optab_libfunc (smul_optab, HFmode, NULL);
1217 set_optab_libfunc (neg_optab, HFmode, NULL);
1218 set_optab_libfunc (sub_optab, HFmode, NULL);
1221 set_optab_libfunc (eq_optab, HFmode, NULL);
1222 set_optab_libfunc (ne_optab, HFmode, NULL);
1223 set_optab_libfunc (lt_optab, HFmode, NULL);
1224 set_optab_libfunc (le_optab, HFmode, NULL);
1225 set_optab_libfunc (ge_optab, HFmode, NULL);
1226 set_optab_libfunc (gt_optab, HFmode, NULL);
1227 set_optab_libfunc (unord_optab, HFmode, NULL);
1234 if (TARGET_AAPCS_BASED)
1235 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1238 /* On AAPCS systems, this is the "struct __va_list". */
1239 static GTY(()) tree va_list_type;
1241 /* Return the type to use as __builtin_va_list. */
1243 arm_build_builtin_va_list (void)
1248 if (!TARGET_AAPCS_BASED)
1249 return std_build_builtin_va_list ();
1251 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1259 The C Library ABI further reinforces this definition in \S
1262 We must follow this definition exactly. The structure tag
1263 name is visible in C++ mangled names, and thus forms a part
1264 of the ABI. The field name may be used by people who
1265 #include <stdarg.h>. */
1266 /* Create the type. */
1267 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1268 /* Give it the required name. */
1269 va_list_name = build_decl (BUILTINS_LOCATION,
1271 get_identifier ("__va_list"),
1273 DECL_ARTIFICIAL (va_list_name) = 1;
1274 TYPE_NAME (va_list_type) = va_list_name;
1275 TYPE_STUB_DECL (va_list_type) = va_list_name;
1276 /* Create the __ap field. */
1277 ap_field = build_decl (BUILTINS_LOCATION,
1279 get_identifier ("__ap"),
1281 DECL_ARTIFICIAL (ap_field) = 1;
1282 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1283 TYPE_FIELDS (va_list_type) = ap_field;
1284 /* Compute its layout. */
1285 layout_type (va_list_type);
1287 return va_list_type;
1290 /* Return an expression of type "void *" pointing to the next
1291 available argument in a variable-argument list. VALIST is the
1292 user-level va_list object, of type __builtin_va_list. */
1294 arm_extract_valist_ptr (tree valist)
1296 if (TREE_TYPE (valist) == error_mark_node)
1297 return error_mark_node;
1299 /* On an AAPCS target, the pointer is stored within "struct
1301 if (TARGET_AAPCS_BASED)
1303 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1304 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1305 valist, ap_field, NULL_TREE);
1311 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1313 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1315 valist = arm_extract_valist_ptr (valist);
1316 std_expand_builtin_va_start (valist, nextarg);
1319 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1321 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1324 valist = arm_extract_valist_ptr (valist);
1325 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1328 /* Lookup NAME in SEL. */
1330 static const struct processors *
1331 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1333 if (!(name && *name))
1336 for (; sel->name != NULL; sel++)
1338 if (streq (name, sel->name))
1342 error ("bad value (%s) for %s switch", name, desc);
1346 /* Implement TARGET_HANDLE_OPTION. */
1349 arm_handle_option (struct gcc_options *opts, struct gcc_options *opts_set,
1350 const struct cl_decoded_option *decoded,
1351 location_t loc ATTRIBUTE_UNUSED)
1353 size_t code = decoded->opt_index;
1354 const char *arg = decoded->arg;
1356 gcc_assert (opts == &global_options);
1357 gcc_assert (opts_set == &global_options_set);
1362 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1366 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1370 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1379 arm_target_help (void)
1382 static int columns = 0;
1385 /* If we have not done so already, obtain the desired maximum width of
1386 the output. Note - this is a duplication of the code at the start of
1387 gcc/opts.c:print_specific_help() - the two copies should probably be
1388 replaced by a single function. */
1393 p = getenv ("COLUMNS");
1396 int value = atoi (p);
1403 /* Use a reasonable default. */
1407 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1409 /* The - 2 is because we know that the last entry in the array is NULL. */
1410 i = ARRAY_SIZE (all_cores) - 2;
1412 printf (" %s", all_cores[i].name);
1413 remaining = columns - (strlen (all_cores[i].name) + 4);
1414 gcc_assert (remaining >= 0);
1418 int len = strlen (all_cores[i].name);
1420 if (remaining > len + 2)
1422 printf (", %s", all_cores[i].name);
1423 remaining -= len + 2;
1429 printf ("\n %s", all_cores[i].name);
1430 remaining = columns - (len + 4);
1434 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1436 i = ARRAY_SIZE (all_architectures) - 2;
1439 printf (" %s", all_architectures[i].name);
1440 remaining = columns - (strlen (all_architectures[i].name) + 4);
1441 gcc_assert (remaining >= 0);
1445 int len = strlen (all_architectures[i].name);
1447 if (remaining > len + 2)
1449 printf (", %s", all_architectures[i].name);
1450 remaining -= len + 2;
1456 printf ("\n %s", all_architectures[i].name);
1457 remaining = columns - (len + 4);
1464 /* Fix up any incompatible options that the user has specified. */
1466 arm_option_override (void)
1470 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1471 SUBTARGET_OVERRIDE_OPTIONS;
1474 if (arm_selected_arch)
1476 if (arm_selected_cpu)
1478 /* Check for conflict between mcpu and march. */
1479 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1481 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1482 arm_selected_cpu->name, arm_selected_arch->name);
1483 /* -march wins for code generation.
1484 -mcpu wins for default tuning. */
1485 if (!arm_selected_tune)
1486 arm_selected_tune = arm_selected_cpu;
1488 arm_selected_cpu = arm_selected_arch;
1492 arm_selected_arch = NULL;
1495 /* Pick a CPU based on the architecture. */
1496 arm_selected_cpu = arm_selected_arch;
1499 /* If the user did not specify a processor, choose one for them. */
1500 if (!arm_selected_cpu)
1502 const struct processors * sel;
1503 unsigned int sought;
1505 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1506 if (!arm_selected_cpu->name)
1508 #ifdef SUBTARGET_CPU_DEFAULT
1509 /* Use the subtarget default CPU if none was specified by
1511 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1513 /* Default to ARM6. */
1514 if (!arm_selected_cpu->name)
1515 arm_selected_cpu = &all_cores[arm6];
1518 sel = arm_selected_cpu;
1519 insn_flags = sel->flags;
1521 /* Now check to see if the user has specified some command line
1522 switch that require certain abilities from the cpu. */
1525 if (TARGET_INTERWORK || TARGET_THUMB)
1527 sought |= (FL_THUMB | FL_MODE32);
1529 /* There are no ARM processors that support both APCS-26 and
1530 interworking. Therefore we force FL_MODE26 to be removed
1531 from insn_flags here (if it was set), so that the search
1532 below will always be able to find a compatible processor. */
1533 insn_flags &= ~FL_MODE26;
1536 if (sought != 0 && ((sought & insn_flags) != sought))
1538 /* Try to locate a CPU type that supports all of the abilities
1539 of the default CPU, plus the extra abilities requested by
1541 for (sel = all_cores; sel->name != NULL; sel++)
1542 if ((sel->flags & sought) == (sought | insn_flags))
1545 if (sel->name == NULL)
1547 unsigned current_bit_count = 0;
1548 const struct processors * best_fit = NULL;
1550 /* Ideally we would like to issue an error message here
1551 saying that it was not possible to find a CPU compatible
1552 with the default CPU, but which also supports the command
1553 line options specified by the programmer, and so they
1554 ought to use the -mcpu=<name> command line option to
1555 override the default CPU type.
1557 If we cannot find a cpu that has both the
1558 characteristics of the default cpu and the given
1559 command line options we scan the array again looking
1560 for a best match. */
1561 for (sel = all_cores; sel->name != NULL; sel++)
1562 if ((sel->flags & sought) == sought)
1566 count = bit_count (sel->flags & insn_flags);
1568 if (count >= current_bit_count)
1571 current_bit_count = count;
1575 gcc_assert (best_fit);
1579 arm_selected_cpu = sel;
1583 gcc_assert (arm_selected_cpu);
1584 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1585 if (!arm_selected_tune)
1586 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1588 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1589 insn_flags = arm_selected_cpu->flags;
1591 arm_tune = arm_selected_tune->core;
1592 tune_flags = arm_selected_tune->flags;
1593 current_tune = arm_selected_tune->tune;
1595 if (target_fp16_format_name)
1597 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1599 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1601 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1605 if (i == ARRAY_SIZE (all_fp16_formats))
1606 error ("invalid __fp16 format option: -mfp16-format=%s",
1607 target_fp16_format_name);
1610 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1612 if (target_abi_name)
1614 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1616 if (streq (arm_all_abis[i].name, target_abi_name))
1618 arm_abi = arm_all_abis[i].abi_type;
1622 if (i == ARRAY_SIZE (arm_all_abis))
1623 error ("invalid ABI option: -mabi=%s", target_abi_name);
1626 arm_abi = ARM_DEFAULT_ABI;
1628 /* Make sure that the processor choice does not conflict with any of the
1629 other command line choices. */
1630 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1631 error ("target CPU does not support ARM mode");
1633 /* BPABI targets use linker tricks to allow interworking on cores
1634 without thumb support. */
1635 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1637 warning (0, "target CPU does not support interworking" );
1638 target_flags &= ~MASK_INTERWORK;
1641 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1643 warning (0, "target CPU does not support THUMB instructions");
1644 target_flags &= ~MASK_THUMB;
1647 if (TARGET_APCS_FRAME && TARGET_THUMB)
1649 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1650 target_flags &= ~MASK_APCS_FRAME;
1653 /* Callee super interworking implies thumb interworking. Adding
1654 this to the flags here simplifies the logic elsewhere. */
1655 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1656 target_flags |= MASK_INTERWORK;
1658 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1659 from here where no function is being compiled currently. */
1660 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1661 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1663 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1664 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1666 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1668 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1669 target_flags |= MASK_APCS_FRAME;
1672 if (TARGET_POKE_FUNCTION_NAME)
1673 target_flags |= MASK_APCS_FRAME;
1675 if (TARGET_APCS_REENT && flag_pic)
1676 error ("-fpic and -mapcs-reent are incompatible");
1678 if (TARGET_APCS_REENT)
1679 warning (0, "APCS reentrant code not supported. Ignored");
1681 /* If this target is normally configured to use APCS frames, warn if they
1682 are turned off and debugging is turned on. */
1684 && write_symbols != NO_DEBUG
1685 && !TARGET_APCS_FRAME
1686 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1687 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1689 if (TARGET_APCS_FLOAT)
1690 warning (0, "passing floating point arguments in fp regs not yet supported");
1692 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1693 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1694 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1695 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1696 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1697 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1698 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1699 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1700 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1701 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1702 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1703 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1704 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1705 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1707 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1708 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1709 thumb_code = TARGET_ARM == 0;
1710 thumb1_code = TARGET_THUMB1 != 0;
1711 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1712 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1713 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1714 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1715 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1717 /* If we are not using the default (ARM mode) section anchor offset
1718 ranges, then set the correct ranges now. */
1721 /* Thumb-1 LDR instructions cannot have negative offsets.
1722 Permissible positive offset ranges are 5-bit (for byte loads),
1723 6-bit (for halfword loads), or 7-bit (for word loads).
1724 Empirical results suggest a 7-bit anchor range gives the best
1725 overall code size. */
1726 targetm.min_anchor_offset = 0;
1727 targetm.max_anchor_offset = 127;
1729 else if (TARGET_THUMB2)
1731 /* The minimum is set such that the total size of the block
1732 for a particular anchor is 248 + 1 + 4095 bytes, which is
1733 divisible by eight, ensuring natural spacing of anchors. */
1734 targetm.min_anchor_offset = -248;
1735 targetm.max_anchor_offset = 4095;
1738 /* V5 code we generate is completely interworking capable, so we turn off
1739 TARGET_INTERWORK here to avoid many tests later on. */
1741 /* XXX However, we must pass the right pre-processor defines to CPP
1742 or GLD can get confused. This is a hack. */
1743 if (TARGET_INTERWORK)
1744 arm_cpp_interwork = 1;
1747 target_flags &= ~MASK_INTERWORK;
1749 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1750 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1752 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1753 error ("iwmmxt abi requires an iwmmxt capable cpu");
1755 if (target_fpu_name == NULL && target_fpe_name != NULL)
1757 if (streq (target_fpe_name, "2"))
1758 target_fpu_name = "fpe2";
1759 else if (streq (target_fpe_name, "3"))
1760 target_fpu_name = "fpe3";
1762 error ("invalid floating point emulation option: -mfpe=%s",
1766 if (target_fpu_name == NULL)
1768 #ifdef FPUTYPE_DEFAULT
1769 target_fpu_name = FPUTYPE_DEFAULT;
1771 if (arm_arch_cirrus)
1772 target_fpu_name = "maverick";
1774 target_fpu_name = "fpe2";
1778 arm_fpu_desc = NULL;
1779 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1781 if (streq (all_fpus[i].name, target_fpu_name))
1783 arm_fpu_desc = &all_fpus[i];
1790 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1794 switch (arm_fpu_desc->model)
1796 case ARM_FP_MODEL_FPA:
1797 if (arm_fpu_desc->rev == 2)
1798 arm_fpu_attr = FPU_FPE2;
1799 else if (arm_fpu_desc->rev == 3)
1800 arm_fpu_attr = FPU_FPE3;
1802 arm_fpu_attr = FPU_FPA;
1805 case ARM_FP_MODEL_MAVERICK:
1806 arm_fpu_attr = FPU_MAVERICK;
1809 case ARM_FP_MODEL_VFP:
1810 arm_fpu_attr = FPU_VFP;
1817 if (target_float_abi_name != NULL)
1819 /* The user specified a FP ABI. */
1820 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1822 if (streq (all_float_abis[i].name, target_float_abi_name))
1824 arm_float_abi = all_float_abis[i].abi_type;
1828 if (i == ARRAY_SIZE (all_float_abis))
1829 error ("invalid floating point abi: -mfloat-abi=%s",
1830 target_float_abi_name);
1833 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1835 if (TARGET_AAPCS_BASED
1836 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1837 error ("FPA is unsupported in the AAPCS");
1839 if (TARGET_AAPCS_BASED)
1841 if (TARGET_CALLER_INTERWORKING)
1842 error ("AAPCS does not support -mcaller-super-interworking");
1844 if (TARGET_CALLEE_INTERWORKING)
1845 error ("AAPCS does not support -mcallee-super-interworking");
1848 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1849 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1850 will ever exist. GCC makes no attempt to support this combination. */
1851 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1852 sorry ("iWMMXt and hardware floating point");
1854 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1855 if (TARGET_THUMB2 && TARGET_IWMMXT)
1856 sorry ("Thumb-2 iWMMXt");
1858 /* __fp16 support currently assumes the core has ldrh. */
1859 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1860 sorry ("__fp16 and no ldrh");
1862 /* If soft-float is specified then don't use FPU. */
1863 if (TARGET_SOFT_FLOAT)
1864 arm_fpu_attr = FPU_NONE;
1866 if (TARGET_AAPCS_BASED)
1868 if (arm_abi == ARM_ABI_IWMMXT)
1869 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1870 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1871 && TARGET_HARD_FLOAT
1873 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1875 arm_pcs_default = ARM_PCS_AAPCS;
1879 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1880 sorry ("-mfloat-abi=hard and VFP");
1882 if (arm_abi == ARM_ABI_APCS)
1883 arm_pcs_default = ARM_PCS_APCS;
1885 arm_pcs_default = ARM_PCS_ATPCS;
1888 /* For arm2/3 there is no need to do any scheduling if there is only
1889 a floating point emulator, or we are doing software floating-point. */
1890 if ((TARGET_SOFT_FLOAT
1891 || (TARGET_FPA && arm_fpu_desc->rev))
1892 && (tune_flags & FL_MODE32) == 0)
1893 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1895 if (target_thread_switch)
1897 if (strcmp (target_thread_switch, "soft") == 0)
1898 target_thread_pointer = TP_SOFT;
1899 else if (strcmp (target_thread_switch, "auto") == 0)
1900 target_thread_pointer = TP_AUTO;
1901 else if (strcmp (target_thread_switch, "cp15") == 0)
1902 target_thread_pointer = TP_CP15;
1904 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1907 /* Use the cp15 method if it is available. */
1908 if (target_thread_pointer == TP_AUTO)
1910 if (arm_arch6k && !TARGET_THUMB1)
1911 target_thread_pointer = TP_CP15;
1913 target_thread_pointer = TP_SOFT;
1916 if (TARGET_HARD_TP && TARGET_THUMB1)
1917 error ("can not use -mtp=cp15 with 16-bit Thumb");
1919 /* Override the default structure alignment for AAPCS ABI. */
1920 if (TARGET_AAPCS_BASED)
1921 arm_structure_size_boundary = 8;
1923 if (structure_size_string != NULL)
1925 int size = strtol (structure_size_string, NULL, 0);
1927 if (size == 8 || size == 32
1928 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1929 arm_structure_size_boundary = size;
1931 warning (0, "structure size boundary can only be set to %s",
1932 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1935 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1937 error ("RTP PIC is incompatible with Thumb");
1941 /* If stack checking is disabled, we can use r10 as the PIC register,
1942 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1943 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1945 if (TARGET_VXWORKS_RTP)
1946 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1947 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1950 if (flag_pic && TARGET_VXWORKS_RTP)
1951 arm_pic_register = 9;
1953 if (arm_pic_register_string != NULL)
1955 int pic_register = decode_reg_name (arm_pic_register_string);
1958 warning (0, "-mpic-register= is useless without -fpic");
1960 /* Prevent the user from choosing an obviously stupid PIC register. */
1961 else if (pic_register < 0 || call_used_regs[pic_register]
1962 || pic_register == HARD_FRAME_POINTER_REGNUM
1963 || pic_register == STACK_POINTER_REGNUM
1964 || pic_register >= PC_REGNUM
1965 || (TARGET_VXWORKS_RTP
1966 && (unsigned int) pic_register != arm_pic_register))
1967 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1969 arm_pic_register = pic_register;
1972 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1973 if (fix_cm3_ldrd == 2)
1975 if (arm_selected_cpu->core == cortexm3)
1981 if (TARGET_THUMB1 && flag_schedule_insns)
1983 /* Don't warn since it's on by default in -O2. */
1984 flag_schedule_insns = 0;
1989 /* If optimizing for size, bump the number of instructions that we
1990 are prepared to conditionally execute (even on a StrongARM). */
1991 max_insns_skipped = 6;
1995 /* StrongARM has early execution of branches, so a sequence
1996 that is worth skipping is shorter. */
1997 if (arm_tune_strongarm)
1998 max_insns_skipped = 3;
2001 /* Hot/Cold partitioning is not currently supported, since we can't
2002 handle literal pool placement in that case. */
2003 if (flag_reorder_blocks_and_partition)
2005 inform (input_location,
2006 "-freorder-blocks-and-partition not supported on this architecture");
2007 flag_reorder_blocks_and_partition = 0;
2008 flag_reorder_blocks = 1;
2012 /* Hoisting PIC address calculations more aggressively provides a small,
2013 but measurable, size reduction for PIC code. Therefore, we decrease
2014 the bar for unrestricted expression hoisting to the cost of PIC address
2015 calculation, which is 2 instructions. */
2016 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2017 global_options.x_param_values,
2018 global_options_set.x_param_values);
2020 /* ARM EABI defaults to strict volatile bitfields. */
2021 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
2022 flag_strict_volatile_bitfields = 1;
2024 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2025 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2026 if (flag_prefetch_loop_arrays < 0
2029 && current_tune->num_prefetch_slots > 0)
2030 flag_prefetch_loop_arrays = 1;
2032 /* Set up parameters to be used in prefetching algorithm. Do not override the
2033 defaults unless we are tuning for a core we have researched values for. */
2034 if (current_tune->num_prefetch_slots > 0)
2035 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2036 current_tune->num_prefetch_slots,
2037 global_options.x_param_values,
2038 global_options_set.x_param_values);
2039 if (current_tune->l1_cache_line_size >= 0)
2040 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2041 current_tune->l1_cache_line_size,
2042 global_options.x_param_values,
2043 global_options_set.x_param_values);
2044 if (current_tune->l1_cache_size >= 0)
2045 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2046 current_tune->l1_cache_size,
2047 global_options.x_param_values,
2048 global_options_set.x_param_values);
2050 /* Register global variables with the garbage collector. */
2051 arm_add_gc_roots ();
2055 arm_add_gc_roots (void)
2057 gcc_obstack_init(&minipool_obstack);
2058 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2061 /* A table of known ARM exception types.
2062 For use with the interrupt function attribute. */
2066 const char *const arg;
2067 const unsigned long return_value;
2071 static const isr_attribute_arg isr_attribute_args [] =
2073 { "IRQ", ARM_FT_ISR },
2074 { "irq", ARM_FT_ISR },
2075 { "FIQ", ARM_FT_FIQ },
2076 { "fiq", ARM_FT_FIQ },
2077 { "ABORT", ARM_FT_ISR },
2078 { "abort", ARM_FT_ISR },
2079 { "ABORT", ARM_FT_ISR },
2080 { "abort", ARM_FT_ISR },
2081 { "UNDEF", ARM_FT_EXCEPTION },
2082 { "undef", ARM_FT_EXCEPTION },
2083 { "SWI", ARM_FT_EXCEPTION },
2084 { "swi", ARM_FT_EXCEPTION },
2085 { NULL, ARM_FT_NORMAL }
2088 /* Returns the (interrupt) function type of the current
2089 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2091 static unsigned long
2092 arm_isr_value (tree argument)
2094 const isr_attribute_arg * ptr;
2098 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2100 /* No argument - default to IRQ. */
2101 if (argument == NULL_TREE)
2104 /* Get the value of the argument. */
2105 if (TREE_VALUE (argument) == NULL_TREE
2106 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2107 return ARM_FT_UNKNOWN;
2109 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2111 /* Check it against the list of known arguments. */
2112 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2113 if (streq (arg, ptr->arg))
2114 return ptr->return_value;
2116 /* An unrecognized interrupt type. */
2117 return ARM_FT_UNKNOWN;
2120 /* Computes the type of the current function. */
2122 static unsigned long
2123 arm_compute_func_type (void)
2125 unsigned long type = ARM_FT_UNKNOWN;
2129 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2131 /* Decide if the current function is volatile. Such functions
2132 never return, and many memory cycles can be saved by not storing
2133 register values that will never be needed again. This optimization
2134 was added to speed up context switching in a kernel application. */
2136 && (TREE_NOTHROW (current_function_decl)
2137 || !(flag_unwind_tables
2139 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2140 && TREE_THIS_VOLATILE (current_function_decl))
2141 type |= ARM_FT_VOLATILE;
2143 if (cfun->static_chain_decl != NULL)
2144 type |= ARM_FT_NESTED;
2146 attr = DECL_ATTRIBUTES (current_function_decl);
2148 a = lookup_attribute ("naked", attr);
2150 type |= ARM_FT_NAKED;
2152 a = lookup_attribute ("isr", attr);
2154 a = lookup_attribute ("interrupt", attr);
2157 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2159 type |= arm_isr_value (TREE_VALUE (a));
2164 /* Returns the type of the current function. */
2167 arm_current_func_type (void)
2169 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2170 cfun->machine->func_type = arm_compute_func_type ();
2172 return cfun->machine->func_type;
2176 arm_allocate_stack_slots_for_args (void)
2178 /* Naked functions should not allocate stack slots for arguments. */
2179 return !IS_NAKED (arm_current_func_type ());
2183 /* Output assembler code for a block containing the constant parts
2184 of a trampoline, leaving space for the variable parts.
2186 On the ARM, (if r8 is the static chain regnum, and remembering that
2187 referencing pc adds an offset of 8) the trampoline looks like:
2190 .word static chain value
2191 .word function's address
2192 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2195 arm_asm_trampoline_template (FILE *f)
2199 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2200 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2202 else if (TARGET_THUMB2)
2204 /* The Thumb-2 trampoline is similar to the arm implementation.
2205 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2206 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2207 STATIC_CHAIN_REGNUM, PC_REGNUM);
2208 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2212 ASM_OUTPUT_ALIGN (f, 2);
2213 fprintf (f, "\t.code\t16\n");
2214 fprintf (f, ".Ltrampoline_start:\n");
2215 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2216 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2217 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2218 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2219 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2220 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2222 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2223 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2226 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2229 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2231 rtx fnaddr, mem, a_tramp;
2233 emit_block_move (m_tramp, assemble_trampoline_template (),
2234 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2236 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2237 emit_move_insn (mem, chain_value);
2239 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2240 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2241 emit_move_insn (mem, fnaddr);
2243 a_tramp = XEXP (m_tramp, 0);
2244 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2245 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2246 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2249 /* Thumb trampolines should be entered in thumb mode, so set
2250 the bottom bit of the address. */
2253 arm_trampoline_adjust_address (rtx addr)
2256 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2257 NULL, 0, OPTAB_LIB_WIDEN);
2261 /* Return 1 if it is possible to return using a single instruction.
2262 If SIBLING is non-null, this is a test for a return before a sibling
2263 call. SIBLING is the call insn, so we can examine its register usage. */
2266 use_return_insn (int iscond, rtx sibling)
2269 unsigned int func_type;
2270 unsigned long saved_int_regs;
2271 unsigned HOST_WIDE_INT stack_adjust;
2272 arm_stack_offsets *offsets;
2274 /* Never use a return instruction before reload has run. */
2275 if (!reload_completed)
2278 func_type = arm_current_func_type ();
2280 /* Naked, volatile and stack alignment functions need special
2282 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2285 /* So do interrupt functions that use the frame pointer and Thumb
2286 interrupt functions. */
2287 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2290 offsets = arm_get_frame_offsets ();
2291 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2293 /* As do variadic functions. */
2294 if (crtl->args.pretend_args_size
2295 || cfun->machine->uses_anonymous_args
2296 /* Or if the function calls __builtin_eh_return () */
2297 || crtl->calls_eh_return
2298 /* Or if the function calls alloca */
2299 || cfun->calls_alloca
2300 /* Or if there is a stack adjustment. However, if the stack pointer
2301 is saved on the stack, we can use a pre-incrementing stack load. */
2302 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2303 && stack_adjust == 4)))
2306 saved_int_regs = offsets->saved_regs_mask;
2308 /* Unfortunately, the insn
2310 ldmib sp, {..., sp, ...}
2312 triggers a bug on most SA-110 based devices, such that the stack
2313 pointer won't be correctly restored if the instruction takes a
2314 page fault. We work around this problem by popping r3 along with
2315 the other registers, since that is never slower than executing
2316 another instruction.
2318 We test for !arm_arch5 here, because code for any architecture
2319 less than this could potentially be run on one of the buggy
2321 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2323 /* Validate that r3 is a call-clobbered register (always true in
2324 the default abi) ... */
2325 if (!call_used_regs[3])
2328 /* ... that it isn't being used for a return value ... */
2329 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2332 /* ... or for a tail-call argument ... */
2335 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2337 if (find_regno_fusage (sibling, USE, 3))
2341 /* ... and that there are no call-saved registers in r0-r2
2342 (always true in the default ABI). */
2343 if (saved_int_regs & 0x7)
2347 /* Can't be done if interworking with Thumb, and any registers have been
2349 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2352 /* On StrongARM, conditional returns are expensive if they aren't
2353 taken and multiple registers have been stacked. */
2354 if (iscond && arm_tune_strongarm)
2356 /* Conditional return when just the LR is stored is a simple
2357 conditional-load instruction, that's not expensive. */
2358 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2362 && arm_pic_register != INVALID_REGNUM
2363 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2367 /* If there are saved registers but the LR isn't saved, then we need
2368 two instructions for the return. */
2369 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2372 /* Can't be done if any of the FPA regs are pushed,
2373 since this also requires an insn. */
2374 if (TARGET_HARD_FLOAT && TARGET_FPA)
2375 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2376 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2379 /* Likewise VFP regs. */
2380 if (TARGET_HARD_FLOAT && TARGET_VFP)
2381 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2382 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2385 if (TARGET_REALLY_IWMMXT)
2386 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2387 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2393 /* Return TRUE if int I is a valid immediate ARM constant. */
2396 const_ok_for_arm (HOST_WIDE_INT i)
2400 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2401 be all zero, or all one. */
2402 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2403 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2404 != ((~(unsigned HOST_WIDE_INT) 0)
2405 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2408 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2410 /* Fast return for 0 and small values. We must do this for zero, since
2411 the code below can't handle that one case. */
2412 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2415 /* Get the number of trailing zeros. */
2416 lowbit = ffs((int) i) - 1;
2418 /* Only even shifts are allowed in ARM mode so round down to the
2419 nearest even number. */
2423 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2428 /* Allow rotated constants in ARM mode. */
2430 && ((i & ~0xc000003f) == 0
2431 || (i & ~0xf000000f) == 0
2432 || (i & ~0xfc000003) == 0))
2439 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2442 if (i == v || i == (v | (v << 8)))
2445 /* Allow repeated pattern 0xXY00XY00. */
2455 /* Return true if I is a valid constant for the operation CODE. */
2457 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2459 if (const_ok_for_arm (i))
2483 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2485 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2491 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2495 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2502 /* Emit a sequence of insns to handle a large constant.
2503 CODE is the code of the operation required, it can be any of SET, PLUS,
2504 IOR, AND, XOR, MINUS;
2505 MODE is the mode in which the operation is being performed;
2506 VAL is the integer to operate on;
2507 SOURCE is the other operand (a register, or a null-pointer for SET);
2508 SUBTARGETS means it is safe to create scratch registers if that will
2509 either produce a simpler sequence, or we will want to cse the values.
2510 Return value is the number of insns emitted. */
2512 /* ??? Tweak this for thumb2. */
2514 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2515 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2519 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2520 cond = COND_EXEC_TEST (PATTERN (insn));
2524 if (subtargets || code == SET
2525 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2526 && REGNO (target) != REGNO (source)))
2528 /* After arm_reorg has been called, we can't fix up expensive
2529 constants by pushing them into memory so we must synthesize
2530 them in-line, regardless of the cost. This is only likely to
2531 be more costly on chips that have load delay slots and we are
2532 compiling without running the scheduler (so no splitting
2533 occurred before the final instruction emission).
2535 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2537 if (!after_arm_reorg
2539 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2541 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2546 /* Currently SET is the only monadic value for CODE, all
2547 the rest are diadic. */
2548 if (TARGET_USE_MOVT)
2549 arm_emit_movpair (target, GEN_INT (val));
2551 emit_set_insn (target, GEN_INT (val));
2557 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2559 if (TARGET_USE_MOVT)
2560 arm_emit_movpair (temp, GEN_INT (val));
2562 emit_set_insn (temp, GEN_INT (val));
2564 /* For MINUS, the value is subtracted from, since we never
2565 have subtraction of a constant. */
2567 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2569 emit_set_insn (target,
2570 gen_rtx_fmt_ee (code, mode, source, temp));
2576 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2580 /* Return the number of instructions required to synthesize the given
2581 constant, if we start emitting them from bit-position I. */
2583 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2585 HOST_WIDE_INT temp1;
2586 int step_size = TARGET_ARM ? 2 : 1;
2589 gcc_assert (TARGET_ARM || i == 0);
2597 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2602 temp1 = remainder & ((0x0ff << end)
2603 | ((i < end) ? (0xff >> (32 - end)) : 0));
2604 remainder &= ~temp1;
2609 } while (remainder);
2614 find_best_start (unsigned HOST_WIDE_INT remainder)
2616 int best_consecutive_zeros = 0;
2620 /* If we aren't targetting ARM, the best place to start is always at
2625 for (i = 0; i < 32; i += 2)
2627 int consecutive_zeros = 0;
2629 if (!(remainder & (3 << i)))
2631 while ((i < 32) && !(remainder & (3 << i)))
2633 consecutive_zeros += 2;
2636 if (consecutive_zeros > best_consecutive_zeros)
2638 best_consecutive_zeros = consecutive_zeros;
2639 best_start = i - consecutive_zeros;
2645 /* So long as it won't require any more insns to do so, it's
2646 desirable to emit a small constant (in bits 0...9) in the last
2647 insn. This way there is more chance that it can be combined with
2648 a later addressing insn to form a pre-indexed load or store
2649 operation. Consider:
2651 *((volatile int *)0xe0000100) = 1;
2652 *((volatile int *)0xe0000110) = 2;
2654 We want this to wind up as:
2658 str rB, [rA, #0x100]
2660 str rB, [rA, #0x110]
2662 rather than having to synthesize both large constants from scratch.
2664 Therefore, we calculate how many insns would be required to emit
2665 the constant starting from `best_start', and also starting from
2666 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2667 yield a shorter sequence, we may as well use zero. */
2669 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2670 && (count_insns_for_constant (remainder, 0) <=
2671 count_insns_for_constant (remainder, best_start)))
2677 /* Emit an instruction with the indicated PATTERN. If COND is
2678 non-NULL, conditionalize the execution of the instruction on COND
2682 emit_constant_insn (rtx cond, rtx pattern)
2685 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2686 emit_insn (pattern);
2689 /* As above, but extra parameter GENERATE which, if clear, suppresses
2691 /* ??? This needs more work for thumb2. */
2694 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2695 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2700 int final_invert = 0;
2701 int can_negate_initial = 0;
2703 int num_bits_set = 0;
2704 int set_sign_bit_copies = 0;
2705 int clear_sign_bit_copies = 0;
2706 int clear_zero_bit_copies = 0;
2707 int set_zero_bit_copies = 0;
2709 unsigned HOST_WIDE_INT temp1, temp2;
2710 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2711 int step_size = TARGET_ARM ? 2 : 1;
2713 /* Find out which operations are safe for a given CODE. Also do a quick
2714 check for degenerate cases; these can occur when DImode operations
2725 can_negate_initial = 1;
2729 if (remainder == 0xffffffff)
2732 emit_constant_insn (cond,
2733 gen_rtx_SET (VOIDmode, target,
2734 GEN_INT (ARM_SIGN_EXTEND (val))));
2740 if (reload_completed && rtx_equal_p (target, source))
2744 emit_constant_insn (cond,
2745 gen_rtx_SET (VOIDmode, target, source));
2757 emit_constant_insn (cond,
2758 gen_rtx_SET (VOIDmode, target, const0_rtx));
2761 if (remainder == 0xffffffff)
2763 if (reload_completed && rtx_equal_p (target, source))
2766 emit_constant_insn (cond,
2767 gen_rtx_SET (VOIDmode, target, source));
2776 if (reload_completed && rtx_equal_p (target, source))
2779 emit_constant_insn (cond,
2780 gen_rtx_SET (VOIDmode, target, source));
2784 if (remainder == 0xffffffff)
2787 emit_constant_insn (cond,
2788 gen_rtx_SET (VOIDmode, target,
2789 gen_rtx_NOT (mode, source)));
2795 /* We treat MINUS as (val - source), since (source - val) is always
2796 passed as (source + (-val)). */
2800 emit_constant_insn (cond,
2801 gen_rtx_SET (VOIDmode, target,
2802 gen_rtx_NEG (mode, source)));
2805 if (const_ok_for_arm (val))
2808 emit_constant_insn (cond,
2809 gen_rtx_SET (VOIDmode, target,
2810 gen_rtx_MINUS (mode, GEN_INT (val),
2822 /* If we can do it in one insn get out quickly. */
2823 if (const_ok_for_arm (val)
2824 || (can_negate_initial && const_ok_for_arm (-val))
2825 || (can_invert && const_ok_for_arm (~val)))
2828 emit_constant_insn (cond,
2829 gen_rtx_SET (VOIDmode, target,
2831 ? gen_rtx_fmt_ee (code, mode, source,
2837 /* Calculate a few attributes that may be useful for specific
2839 /* Count number of leading zeros. */
2840 for (i = 31; i >= 0; i--)
2842 if ((remainder & (1 << i)) == 0)
2843 clear_sign_bit_copies++;
2848 /* Count number of leading 1's. */
2849 for (i = 31; i >= 0; i--)
2851 if ((remainder & (1 << i)) != 0)
2852 set_sign_bit_copies++;
2857 /* Count number of trailing zero's. */
2858 for (i = 0; i <= 31; i++)
2860 if ((remainder & (1 << i)) == 0)
2861 clear_zero_bit_copies++;
2866 /* Count number of trailing 1's. */
2867 for (i = 0; i <= 31; i++)
2869 if ((remainder & (1 << i)) != 0)
2870 set_zero_bit_copies++;
2878 /* See if we can use movw. */
2879 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2882 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2887 /* See if we can do this by sign_extending a constant that is known
2888 to be negative. This is a good, way of doing it, since the shift
2889 may well merge into a subsequent insn. */
2890 if (set_sign_bit_copies > 1)
2892 if (const_ok_for_arm
2893 (temp1 = ARM_SIGN_EXTEND (remainder
2894 << (set_sign_bit_copies - 1))))
2898 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2899 emit_constant_insn (cond,
2900 gen_rtx_SET (VOIDmode, new_src,
2902 emit_constant_insn (cond,
2903 gen_ashrsi3 (target, new_src,
2904 GEN_INT (set_sign_bit_copies - 1)));
2908 /* For an inverted constant, we will need to set the low bits,
2909 these will be shifted out of harm's way. */
2910 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2911 if (const_ok_for_arm (~temp1))
2915 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2916 emit_constant_insn (cond,
2917 gen_rtx_SET (VOIDmode, new_src,
2919 emit_constant_insn (cond,
2920 gen_ashrsi3 (target, new_src,
2921 GEN_INT (set_sign_bit_copies - 1)));
2927 /* See if we can calculate the value as the difference between two
2928 valid immediates. */
2929 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2931 int topshift = clear_sign_bit_copies & ~1;
2933 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2934 & (0xff000000 >> topshift));
2936 /* If temp1 is zero, then that means the 9 most significant
2937 bits of remainder were 1 and we've caused it to overflow.
2938 When topshift is 0 we don't need to do anything since we
2939 can borrow from 'bit 32'. */
2940 if (temp1 == 0 && topshift != 0)
2941 temp1 = 0x80000000 >> (topshift - 1);
2943 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2945 if (const_ok_for_arm (temp2))
2949 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2950 emit_constant_insn (cond,
2951 gen_rtx_SET (VOIDmode, new_src,
2953 emit_constant_insn (cond,
2954 gen_addsi3 (target, new_src,
2962 /* See if we can generate this by setting the bottom (or the top)
2963 16 bits, and then shifting these into the other half of the
2964 word. We only look for the simplest cases, to do more would cost
2965 too much. Be careful, however, not to generate this when the
2966 alternative would take fewer insns. */
2967 if (val & 0xffff0000)
2969 temp1 = remainder & 0xffff0000;
2970 temp2 = remainder & 0x0000ffff;
2972 /* Overlaps outside this range are best done using other methods. */
2973 for (i = 9; i < 24; i++)
2975 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2976 && !const_ok_for_arm (temp2))
2978 rtx new_src = (subtargets
2979 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2981 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2982 source, subtargets, generate);
2990 gen_rtx_ASHIFT (mode, source,
2997 /* Don't duplicate cases already considered. */
2998 for (i = 17; i < 24; i++)
3000 if (((temp1 | (temp1 >> i)) == remainder)
3001 && !const_ok_for_arm (temp1))
3003 rtx new_src = (subtargets
3004 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3006 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3007 source, subtargets, generate);
3012 gen_rtx_SET (VOIDmode, target,
3015 gen_rtx_LSHIFTRT (mode, source,
3026 /* If we have IOR or XOR, and the constant can be loaded in a
3027 single instruction, and we can find a temporary to put it in,
3028 then this can be done in two instructions instead of 3-4. */
3030 /* TARGET can't be NULL if SUBTARGETS is 0 */
3031 || (reload_completed && !reg_mentioned_p (target, source)))
3033 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3037 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3039 emit_constant_insn (cond,
3040 gen_rtx_SET (VOIDmode, sub,
3042 emit_constant_insn (cond,
3043 gen_rtx_SET (VOIDmode, target,
3044 gen_rtx_fmt_ee (code, mode,
3055 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3056 and the remainder 0s for e.g. 0xfff00000)
3057 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3059 This can be done in 2 instructions by using shifts with mov or mvn.
3064 mvn r0, r0, lsr #12 */
3065 if (set_sign_bit_copies > 8
3066 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3070 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3071 rtx shift = GEN_INT (set_sign_bit_copies);
3075 gen_rtx_SET (VOIDmode, sub,
3077 gen_rtx_ASHIFT (mode,
3082 gen_rtx_SET (VOIDmode, target,
3084 gen_rtx_LSHIFTRT (mode, sub,
3091 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3093 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3095 For eg. r0 = r0 | 0xfff
3100 if (set_zero_bit_copies > 8
3101 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3105 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3106 rtx shift = GEN_INT (set_zero_bit_copies);
3110 gen_rtx_SET (VOIDmode, sub,
3112 gen_rtx_LSHIFTRT (mode,
3117 gen_rtx_SET (VOIDmode, target,
3119 gen_rtx_ASHIFT (mode, sub,
3125 /* This will never be reached for Thumb2 because orn is a valid
3126 instruction. This is for Thumb1 and the ARM 32 bit cases.
3128 x = y | constant (such that ~constant is a valid constant)
3130 x = ~(~y & ~constant).
3132 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3136 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3137 emit_constant_insn (cond,
3138 gen_rtx_SET (VOIDmode, sub,
3139 gen_rtx_NOT (mode, source)));
3142 sub = gen_reg_rtx (mode);
3143 emit_constant_insn (cond,
3144 gen_rtx_SET (VOIDmode, sub,
3145 gen_rtx_AND (mode, source,
3147 emit_constant_insn (cond,
3148 gen_rtx_SET (VOIDmode, target,
3149 gen_rtx_NOT (mode, sub)));
3156 /* See if two shifts will do 2 or more insn's worth of work. */
3157 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3159 HOST_WIDE_INT shift_mask = ((0xffffffff
3160 << (32 - clear_sign_bit_copies))
3163 if ((remainder | shift_mask) != 0xffffffff)
3167 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3168 insns = arm_gen_constant (AND, mode, cond,
3169 remainder | shift_mask,
3170 new_src, source, subtargets, 1);
3175 rtx targ = subtargets ? NULL_RTX : target;
3176 insns = arm_gen_constant (AND, mode, cond,
3177 remainder | shift_mask,
3178 targ, source, subtargets, 0);
3184 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3185 rtx shift = GEN_INT (clear_sign_bit_copies);
3187 emit_insn (gen_ashlsi3 (new_src, source, shift));
3188 emit_insn (gen_lshrsi3 (target, new_src, shift));
3194 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3196 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3198 if ((remainder | shift_mask) != 0xffffffff)
3202 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3204 insns = arm_gen_constant (AND, mode, cond,
3205 remainder | shift_mask,
3206 new_src, source, subtargets, 1);
3211 rtx targ = subtargets ? NULL_RTX : target;
3213 insns = arm_gen_constant (AND, mode, cond,
3214 remainder | shift_mask,
3215 targ, source, subtargets, 0);
3221 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3222 rtx shift = GEN_INT (clear_zero_bit_copies);
3224 emit_insn (gen_lshrsi3 (new_src, source, shift));
3225 emit_insn (gen_ashlsi3 (target, new_src, shift));
3237 for (i = 0; i < 32; i++)
3238 if (remainder & (1 << i))
3242 || (code != IOR && can_invert && num_bits_set > 16))
3243 remainder ^= 0xffffffff;
3244 else if (code == PLUS && num_bits_set > 16)
3245 remainder = (-remainder) & 0xffffffff;
3247 /* For XOR, if more than half the bits are set and there's a sequence
3248 of more than 8 consecutive ones in the pattern then we can XOR by the
3249 inverted constant and then invert the final result; this may save an
3250 instruction and might also lead to the final mvn being merged with
3251 some other operation. */
3252 else if (code == XOR && num_bits_set > 16
3253 && (count_insns_for_constant (remainder ^ 0xffffffff,
3255 (remainder ^ 0xffffffff))
3256 < count_insns_for_constant (remainder,
3257 find_best_start (remainder))))
3259 remainder ^= 0xffffffff;
3268 /* Now try and find a way of doing the job in either two or three
3270 We start by looking for the largest block of zeros that are aligned on
3271 a 2-bit boundary, we then fill up the temps, wrapping around to the
3272 top of the word when we drop off the bottom.
3273 In the worst case this code should produce no more than four insns.
3274 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3275 best place to start. */
3277 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3280 /* Now start emitting the insns. */
3281 i = find_best_start (remainder);
3288 if (remainder & (3 << (i - 2)))
3293 temp1 = remainder & ((0x0ff << end)
3294 | ((i < end) ? (0xff >> (32 - end)) : 0));
3295 remainder &= ~temp1;
3299 rtx new_src, temp1_rtx;
3301 if (code == SET || code == MINUS)
3303 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3304 if (can_invert && code != MINUS)
3309 if ((final_invert || remainder) && subtargets)
3310 new_src = gen_reg_rtx (mode);
3315 else if (can_negate)
3319 temp1 = trunc_int_for_mode (temp1, mode);
3320 temp1_rtx = GEN_INT (temp1);
3324 else if (code == MINUS)
3325 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3327 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3329 emit_constant_insn (cond,
3330 gen_rtx_SET (VOIDmode, new_src,
3340 else if (code == MINUS)
3346 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3356 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3357 gen_rtx_NOT (mode, source)));
3364 /* Canonicalize a comparison so that we are more likely to recognize it.
3365 This can be done for a few constant compares, where we can make the
3366 immediate value easier to load. */
3369 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3371 enum machine_mode mode;
3372 unsigned HOST_WIDE_INT i, maxval;
3374 mode = GET_MODE (*op0);
3375 if (mode == VOIDmode)
3376 mode = GET_MODE (*op1);
3378 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3380 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3381 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3382 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3383 for GTU/LEU in Thumb mode. */
3388 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3390 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3393 if (code == GT || code == LE
3394 || (!TARGET_ARM && (code == GTU || code == LEU)))
3396 /* Missing comparison. First try to use an available
3398 if (GET_CODE (*op1) == CONST_INT)
3406 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3408 *op1 = GEN_INT (i + 1);
3409 return code == GT ? GE : LT;
3414 if (i != ~((unsigned HOST_WIDE_INT) 0)
3415 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3417 *op1 = GEN_INT (i + 1);
3418 return code == GTU ? GEU : LTU;
3426 /* If that did not work, reverse the condition. */
3430 return swap_condition (code);
3436 /* Comparisons smaller than DImode. Only adjust comparisons against
3437 an out-of-range constant. */
3438 if (GET_CODE (*op1) != CONST_INT
3439 || const_ok_for_arm (INTVAL (*op1))
3440 || const_ok_for_arm (- INTVAL (*op1)))
3454 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3456 *op1 = GEN_INT (i + 1);
3457 return code == GT ? GE : LT;
3464 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3466 *op1 = GEN_INT (i - 1);
3467 return code == GE ? GT : LE;
3473 if (i != ~((unsigned HOST_WIDE_INT) 0)
3474 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3476 *op1 = GEN_INT (i + 1);
3477 return code == GTU ? GEU : LTU;
3484 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3486 *op1 = GEN_INT (i - 1);
3487 return code == GEU ? GTU : LEU;
3499 /* Define how to find the value returned by a function. */
3502 arm_function_value(const_tree type, const_tree func,
3503 bool outgoing ATTRIBUTE_UNUSED)
3505 enum machine_mode mode;
3506 int unsignedp ATTRIBUTE_UNUSED;
3507 rtx r ATTRIBUTE_UNUSED;
3509 mode = TYPE_MODE (type);
3511 if (TARGET_AAPCS_BASED)
3512 return aapcs_allocate_return_reg (mode, type, func);
3514 /* Promote integer types. */
3515 if (INTEGRAL_TYPE_P (type))
3516 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3518 /* Promotes small structs returned in a register to full-word size
3519 for big-endian AAPCS. */
3520 if (arm_return_in_msb (type))
3522 HOST_WIDE_INT size = int_size_in_bytes (type);
3523 if (size % UNITS_PER_WORD != 0)
3525 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3526 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3530 return LIBCALL_VALUE (mode);
3534 libcall_eq (const void *p1, const void *p2)
3536 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3540 libcall_hash (const void *p1)
3542 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3546 add_libcall (htab_t htab, rtx libcall)
3548 *htab_find_slot (htab, libcall, INSERT) = libcall;
3552 arm_libcall_uses_aapcs_base (const_rtx libcall)
3554 static bool init_done = false;
3555 static htab_t libcall_htab;
3561 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3563 add_libcall (libcall_htab,
3564 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3565 add_libcall (libcall_htab,
3566 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3567 add_libcall (libcall_htab,
3568 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3569 add_libcall (libcall_htab,
3570 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3572 add_libcall (libcall_htab,
3573 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3574 add_libcall (libcall_htab,
3575 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3576 add_libcall (libcall_htab,
3577 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3578 add_libcall (libcall_htab,
3579 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3581 add_libcall (libcall_htab,
3582 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3583 add_libcall (libcall_htab,
3584 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3585 add_libcall (libcall_htab,
3586 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3587 add_libcall (libcall_htab,
3588 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3589 add_libcall (libcall_htab,
3590 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3591 add_libcall (libcall_htab,
3592 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3595 return libcall && htab_find (libcall_htab, libcall) != NULL;
3599 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3601 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3602 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3604 /* The following libcalls return their result in integer registers,
3605 even though they return a floating point value. */
3606 if (arm_libcall_uses_aapcs_base (libcall))
3607 return gen_rtx_REG (mode, ARG_REGISTER(1));
3611 return LIBCALL_VALUE (mode);
3614 /* Determine the amount of memory needed to store the possible return
3615 registers of an untyped call. */
3617 arm_apply_result_size (void)
3623 if (TARGET_HARD_FLOAT_ABI)
3629 if (TARGET_MAVERICK)
3632 if (TARGET_IWMMXT_ABI)
3639 /* Decide whether TYPE should be returned in memory (true)
3640 or in a register (false). FNTYPE is the type of the function making
3643 arm_return_in_memory (const_tree type, const_tree fntype)
3647 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3649 if (TARGET_AAPCS_BASED)
3651 /* Simple, non-aggregate types (ie not including vectors and
3652 complex) are always returned in a register (or registers).
3653 We don't care about which register here, so we can short-cut
3654 some of the detail. */
3655 if (!AGGREGATE_TYPE_P (type)
3656 && TREE_CODE (type) != VECTOR_TYPE
3657 && TREE_CODE (type) != COMPLEX_TYPE)
3660 /* Any return value that is no larger than one word can be
3662 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3665 /* Check any available co-processors to see if they accept the
3666 type as a register candidate (VFP, for example, can return
3667 some aggregates in consecutive registers). These aren't
3668 available if the call is variadic. */
3669 if (aapcs_select_return_coproc (type, fntype) >= 0)
3672 /* Vector values should be returned using ARM registers, not
3673 memory (unless they're over 16 bytes, which will break since
3674 we only have four call-clobbered registers to play with). */
3675 if (TREE_CODE (type) == VECTOR_TYPE)
3676 return (size < 0 || size > (4 * UNITS_PER_WORD));
3678 /* The rest go in memory. */
3682 if (TREE_CODE (type) == VECTOR_TYPE)
3683 return (size < 0 || size > (4 * UNITS_PER_WORD));
3685 if (!AGGREGATE_TYPE_P (type) &&
3686 (TREE_CODE (type) != VECTOR_TYPE))
3687 /* All simple types are returned in registers. */
3690 if (arm_abi != ARM_ABI_APCS)
3692 /* ATPCS and later return aggregate types in memory only if they are
3693 larger than a word (or are variable size). */
3694 return (size < 0 || size > UNITS_PER_WORD);
3697 /* For the arm-wince targets we choose to be compatible with Microsoft's
3698 ARM and Thumb compilers, which always return aggregates in memory. */
3700 /* All structures/unions bigger than one word are returned in memory.
3701 Also catch the case where int_size_in_bytes returns -1. In this case
3702 the aggregate is either huge or of variable size, and in either case
3703 we will want to return it via memory and not in a register. */
3704 if (size < 0 || size > UNITS_PER_WORD)
3707 if (TREE_CODE (type) == RECORD_TYPE)
3711 /* For a struct the APCS says that we only return in a register
3712 if the type is 'integer like' and every addressable element
3713 has an offset of zero. For practical purposes this means
3714 that the structure can have at most one non bit-field element
3715 and that this element must be the first one in the structure. */
3717 /* Find the first field, ignoring non FIELD_DECL things which will
3718 have been created by C++. */
3719 for (field = TYPE_FIELDS (type);
3720 field && TREE_CODE (field) != FIELD_DECL;
3721 field = DECL_CHAIN (field))
3725 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3727 /* Check that the first field is valid for returning in a register. */
3729 /* ... Floats are not allowed */
3730 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3733 /* ... Aggregates that are not themselves valid for returning in
3734 a register are not allowed. */
3735 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3738 /* Now check the remaining fields, if any. Only bitfields are allowed,
3739 since they are not addressable. */
3740 for (field = DECL_CHAIN (field);
3742 field = DECL_CHAIN (field))
3744 if (TREE_CODE (field) != FIELD_DECL)
3747 if (!DECL_BIT_FIELD_TYPE (field))
3754 if (TREE_CODE (type) == UNION_TYPE)
3758 /* Unions can be returned in registers if every element is
3759 integral, or can be returned in an integer register. */
3760 for (field = TYPE_FIELDS (type);
3762 field = DECL_CHAIN (field))
3764 if (TREE_CODE (field) != FIELD_DECL)
3767 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3770 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3776 #endif /* not ARM_WINCE */
3778 /* Return all other types in memory. */
3782 /* Indicate whether or not words of a double are in big-endian order. */
3785 arm_float_words_big_endian (void)
3787 if (TARGET_MAVERICK)
3790 /* For FPA, float words are always big-endian. For VFP, floats words
3791 follow the memory system mode. */
3799 return (TARGET_BIG_END ? 1 : 0);
3804 const struct pcs_attribute_arg
3808 } pcs_attribute_args[] =
3810 {"aapcs", ARM_PCS_AAPCS},
3811 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3813 /* We could recognize these, but changes would be needed elsewhere
3814 * to implement them. */
3815 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3816 {"atpcs", ARM_PCS_ATPCS},
3817 {"apcs", ARM_PCS_APCS},
3819 {NULL, ARM_PCS_UNKNOWN}
3823 arm_pcs_from_attribute (tree attr)
3825 const struct pcs_attribute_arg *ptr;
3828 /* Get the value of the argument. */
3829 if (TREE_VALUE (attr) == NULL_TREE
3830 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3831 return ARM_PCS_UNKNOWN;
3833 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3835 /* Check it against the list of known arguments. */
3836 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3837 if (streq (arg, ptr->arg))
3840 /* An unrecognized interrupt type. */
3841 return ARM_PCS_UNKNOWN;
3844 /* Get the PCS variant to use for this call. TYPE is the function's type
3845 specification, DECL is the specific declartion. DECL may be null if
3846 the call could be indirect or if this is a library call. */
3848 arm_get_pcs_model (const_tree type, const_tree decl)
3850 bool user_convention = false;
3851 enum arm_pcs user_pcs = arm_pcs_default;
3856 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3859 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3860 user_convention = true;
3863 if (TARGET_AAPCS_BASED)
3865 /* Detect varargs functions. These always use the base rules
3866 (no argument is ever a candidate for a co-processor
3868 bool base_rules = stdarg_p (type);
3870 if (user_convention)
3872 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3873 sorry ("non-AAPCS derived PCS variant");
3874 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3875 error ("variadic functions must use the base AAPCS variant");
3879 return ARM_PCS_AAPCS;
3880 else if (user_convention)
3882 else if (decl && flag_unit_at_a_time)
3884 /* Local functions never leak outside this compilation unit,
3885 so we are free to use whatever conventions are
3887 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3888 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3890 return ARM_PCS_AAPCS_LOCAL;
3893 else if (user_convention && user_pcs != arm_pcs_default)
3894 sorry ("PCS variant");
3896 /* For everything else we use the target's default. */
3897 return arm_pcs_default;
3902 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3903 const_tree fntype ATTRIBUTE_UNUSED,
3904 rtx libcall ATTRIBUTE_UNUSED,
3905 const_tree fndecl ATTRIBUTE_UNUSED)
3907 /* Record the unallocated VFP registers. */
3908 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3909 pcum->aapcs_vfp_reg_alloc = 0;
3912 /* Walk down the type tree of TYPE counting consecutive base elements.
3913 If *MODEP is VOIDmode, then set it to the first valid floating point
3914 type. If a non-floating point type is found, or if a floating point
3915 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3916 otherwise return the count in the sub-tree. */
3918 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3920 enum machine_mode mode;
3923 switch (TREE_CODE (type))
3926 mode = TYPE_MODE (type);
3927 if (mode != DFmode && mode != SFmode)
3930 if (*modep == VOIDmode)
3939 mode = TYPE_MODE (TREE_TYPE (type));
3940 if (mode != DFmode && mode != SFmode)
3943 if (*modep == VOIDmode)
3952 /* Use V2SImode and V4SImode as representatives of all 64-bit
3953 and 128-bit vector types, whether or not those modes are
3954 supported with the present options. */
3955 size = int_size_in_bytes (type);
3968 if (*modep == VOIDmode)
3971 /* Vector modes are considered to be opaque: two vectors are
3972 equivalent for the purposes of being homogeneous aggregates
3973 if they are the same size. */
3982 tree index = TYPE_DOMAIN (type);
3984 /* Can't handle incomplete types. */
3985 if (!COMPLETE_TYPE_P(type))
3988 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3991 || !TYPE_MAX_VALUE (index)
3992 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3993 || !TYPE_MIN_VALUE (index)
3994 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3998 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3999 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4001 /* There must be no padding. */
4002 if (!host_integerp (TYPE_SIZE (type), 1)
4003 || (tree_low_cst (TYPE_SIZE (type), 1)
4004 != count * GET_MODE_BITSIZE (*modep)))
4016 /* Can't handle incomplete types. */
4017 if (!COMPLETE_TYPE_P(type))
4020 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4022 if (TREE_CODE (field) != FIELD_DECL)
4025 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4031 /* There must be no padding. */
4032 if (!host_integerp (TYPE_SIZE (type), 1)
4033 || (tree_low_cst (TYPE_SIZE (type), 1)
4034 != count * GET_MODE_BITSIZE (*modep)))
4041 case QUAL_UNION_TYPE:
4043 /* These aren't very interesting except in a degenerate case. */
4048 /* Can't handle incomplete types. */
4049 if (!COMPLETE_TYPE_P(type))
4052 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4054 if (TREE_CODE (field) != FIELD_DECL)
4057 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4060 count = count > sub_count ? count : sub_count;
4063 /* There must be no padding. */
4064 if (!host_integerp (TYPE_SIZE (type), 1)
4065 || (tree_low_cst (TYPE_SIZE (type), 1)
4066 != count * GET_MODE_BITSIZE (*modep)))
4079 /* Return true if PCS_VARIANT should use VFP registers. */
4081 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4083 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4085 static bool seen_thumb1_vfp = false;
4087 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4089 sorry ("Thumb-1 hard-float VFP ABI");
4090 /* sorry() is not immediately fatal, so only display this once. */
4091 seen_thumb1_vfp = true;
4097 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4100 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4101 (TARGET_VFP_DOUBLE || !is_double));
4105 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4106 enum machine_mode mode, const_tree type,
4107 enum machine_mode *base_mode, int *count)
4109 enum machine_mode new_mode = VOIDmode;
4111 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4112 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4113 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4118 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4121 new_mode = (mode == DCmode ? DFmode : SFmode);
4123 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
4125 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4127 if (ag_count > 0 && ag_count <= 4)
4136 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4139 *base_mode = new_mode;
4144 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4145 enum machine_mode mode, const_tree type)
4147 int count ATTRIBUTE_UNUSED;
4148 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4150 if (!use_vfp_abi (pcs_variant, false))
4152 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4157 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4160 if (!use_vfp_abi (pcum->pcs_variant, false))
4163 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4164 &pcum->aapcs_vfp_rmode,
4165 &pcum->aapcs_vfp_rcount);
4169 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4170 const_tree type ATTRIBUTE_UNUSED)
4172 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4173 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4176 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4177 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4179 pcum->aapcs_vfp_reg_alloc = mask << regno;
4180 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4183 int rcount = pcum->aapcs_vfp_rcount;
4185 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4189 /* Avoid using unsupported vector modes. */
4190 if (rmode == V2SImode)
4192 else if (rmode == V4SImode)
4199 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4200 for (i = 0; i < rcount; i++)
4202 rtx tmp = gen_rtx_REG (rmode,
4203 FIRST_VFP_REGNUM + regno + i * rshift);
4204 tmp = gen_rtx_EXPR_LIST
4206 GEN_INT (i * GET_MODE_SIZE (rmode)));
4207 XVECEXP (par, 0, i) = tmp;
4210 pcum->aapcs_reg = par;
4213 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4220 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4221 enum machine_mode mode,
4222 const_tree type ATTRIBUTE_UNUSED)
4224 if (!use_vfp_abi (pcs_variant, false))
4227 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4230 enum machine_mode ag_mode;
4235 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4240 if (ag_mode == V2SImode)
4242 else if (ag_mode == V4SImode)
4248 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4249 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4250 for (i = 0; i < count; i++)
4252 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4253 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4254 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4255 XVECEXP (par, 0, i) = tmp;
4261 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4265 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4266 enum machine_mode mode ATTRIBUTE_UNUSED,
4267 const_tree type ATTRIBUTE_UNUSED)
4269 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4270 pcum->aapcs_vfp_reg_alloc = 0;
4274 #define AAPCS_CP(X) \
4276 aapcs_ ## X ## _cum_init, \
4277 aapcs_ ## X ## _is_call_candidate, \
4278 aapcs_ ## X ## _allocate, \
4279 aapcs_ ## X ## _is_return_candidate, \
4280 aapcs_ ## X ## _allocate_return_reg, \
4281 aapcs_ ## X ## _advance \
4284 /* Table of co-processors that can be used to pass arguments in
4285 registers. Idealy no arugment should be a candidate for more than
4286 one co-processor table entry, but the table is processed in order
4287 and stops after the first match. If that entry then fails to put
4288 the argument into a co-processor register, the argument will go on
4292 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4293 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4295 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4296 BLKmode) is a candidate for this co-processor's registers; this
4297 function should ignore any position-dependent state in
4298 CUMULATIVE_ARGS and only use call-type dependent information. */
4299 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4301 /* Return true if the argument does get a co-processor register; it
4302 should set aapcs_reg to an RTX of the register allocated as is
4303 required for a return from FUNCTION_ARG. */
4304 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4306 /* Return true if a result of mode MODE (or type TYPE if MODE is
4307 BLKmode) is can be returned in this co-processor's registers. */
4308 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4310 /* Allocate and return an RTX element to hold the return type of a
4311 call, this routine must not fail and will only be called if
4312 is_return_candidate returned true with the same parameters. */
4313 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4315 /* Finish processing this argument and prepare to start processing
4317 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4318 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4326 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4331 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4332 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4339 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4341 /* We aren't passed a decl, so we can't check that a call is local.
4342 However, it isn't clear that that would be a win anyway, since it
4343 might limit some tail-calling opportunities. */
4344 enum arm_pcs pcs_variant;
4348 const_tree fndecl = NULL_TREE;
4350 if (TREE_CODE (fntype) == FUNCTION_DECL)
4353 fntype = TREE_TYPE (fntype);
4356 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4359 pcs_variant = arm_pcs_default;
4361 if (pcs_variant != ARM_PCS_AAPCS)
4365 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4366 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4375 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4378 /* We aren't passed a decl, so we can't check that a call is local.
4379 However, it isn't clear that that would be a win anyway, since it
4380 might limit some tail-calling opportunities. */
4381 enum arm_pcs pcs_variant;
4382 int unsignedp ATTRIBUTE_UNUSED;
4386 const_tree fndecl = NULL_TREE;
4388 if (TREE_CODE (fntype) == FUNCTION_DECL)
4391 fntype = TREE_TYPE (fntype);
4394 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4397 pcs_variant = arm_pcs_default;
4399 /* Promote integer types. */
4400 if (type && INTEGRAL_TYPE_P (type))
4401 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4403 if (pcs_variant != ARM_PCS_AAPCS)
4407 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4408 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4410 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4414 /* Promotes small structs returned in a register to full-word size
4415 for big-endian AAPCS. */
4416 if (type && arm_return_in_msb (type))
4418 HOST_WIDE_INT size = int_size_in_bytes (type);
4419 if (size % UNITS_PER_WORD != 0)
4421 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4422 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4426 return gen_rtx_REG (mode, R0_REGNUM);
4430 aapcs_libcall_value (enum machine_mode mode)
4432 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4435 /* Lay out a function argument using the AAPCS rules. The rule
4436 numbers referred to here are those in the AAPCS. */
4438 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4439 const_tree type, bool named)
4444 /* We only need to do this once per argument. */
4445 if (pcum->aapcs_arg_processed)
4448 pcum->aapcs_arg_processed = true;
4450 /* Special case: if named is false then we are handling an incoming
4451 anonymous argument which is on the stack. */
4455 /* Is this a potential co-processor register candidate? */
4456 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4458 int slot = aapcs_select_call_coproc (pcum, mode, type);
4459 pcum->aapcs_cprc_slot = slot;
4461 /* We don't have to apply any of the rules from part B of the
4462 preparation phase, these are handled elsewhere in the
4467 /* A Co-processor register candidate goes either in its own
4468 class of registers or on the stack. */
4469 if (!pcum->aapcs_cprc_failed[slot])
4471 /* C1.cp - Try to allocate the argument to co-processor
4473 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4476 /* C2.cp - Put the argument on the stack and note that we
4477 can't assign any more candidates in this slot. We also
4478 need to note that we have allocated stack space, so that
4479 we won't later try to split a non-cprc candidate between
4480 core registers and the stack. */
4481 pcum->aapcs_cprc_failed[slot] = true;
4482 pcum->can_split = false;
4485 /* We didn't get a register, so this argument goes on the
4487 gcc_assert (pcum->can_split == false);
4492 /* C3 - For double-word aligned arguments, round the NCRN up to the
4493 next even number. */
4494 ncrn = pcum->aapcs_ncrn;
4495 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4498 nregs = ARM_NUM_REGS2(mode, type);
4500 /* Sigh, this test should really assert that nregs > 0, but a GCC
4501 extension allows empty structs and then gives them empty size; it
4502 then allows such a structure to be passed by value. For some of
4503 the code below we have to pretend that such an argument has
4504 non-zero size so that we 'locate' it correctly either in
4505 registers or on the stack. */
4506 gcc_assert (nregs >= 0);
4508 nregs2 = nregs ? nregs : 1;
4510 /* C4 - Argument fits entirely in core registers. */
4511 if (ncrn + nregs2 <= NUM_ARG_REGS)
4513 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4514 pcum->aapcs_next_ncrn = ncrn + nregs;
4518 /* C5 - Some core registers left and there are no arguments already
4519 on the stack: split this argument between the remaining core
4520 registers and the stack. */
4521 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4523 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4524 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4525 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4529 /* C6 - NCRN is set to 4. */
4530 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4532 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4536 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4537 for a call to a function whose data type is FNTYPE.
4538 For a library call, FNTYPE is NULL. */
4540 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4542 tree fndecl ATTRIBUTE_UNUSED)
4544 /* Long call handling. */
4546 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4548 pcum->pcs_variant = arm_pcs_default;
4550 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4552 if (arm_libcall_uses_aapcs_base (libname))
4553 pcum->pcs_variant = ARM_PCS_AAPCS;
4555 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4556 pcum->aapcs_reg = NULL_RTX;
4557 pcum->aapcs_partial = 0;
4558 pcum->aapcs_arg_processed = false;
4559 pcum->aapcs_cprc_slot = -1;
4560 pcum->can_split = true;
4562 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4566 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4568 pcum->aapcs_cprc_failed[i] = false;
4569 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4577 /* On the ARM, the offset starts at 0. */
4579 pcum->iwmmxt_nregs = 0;
4580 pcum->can_split = true;
4582 /* Varargs vectors are treated the same as long long.
4583 named_count avoids having to change the way arm handles 'named' */
4584 pcum->named_count = 0;
4587 if (TARGET_REALLY_IWMMXT && fntype)
4591 for (fn_arg = TYPE_ARG_TYPES (fntype);
4593 fn_arg = TREE_CHAIN (fn_arg))
4594 pcum->named_count += 1;
4596 if (! pcum->named_count)
4597 pcum->named_count = INT_MAX;
4602 /* Return true if mode/type need doubleword alignment. */
4604 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4606 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4607 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4611 /* Determine where to put an argument to a function.
4612 Value is zero to push the argument on the stack,
4613 or a hard register in which to store the argument.
4615 MODE is the argument's machine mode.
4616 TYPE is the data type of the argument (as a tree).
4617 This is null for libcalls where that information may
4619 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4620 the preceding args and about the function being called.
4621 NAMED is nonzero if this argument is a named parameter
4622 (otherwise it is an extra parameter matching an ellipsis).
4624 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4625 other arguments are passed on the stack. If (NAMED == 0) (which happens
4626 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4627 defined), say it is passed in the stack (function_prologue will
4628 indeed make it pass in the stack if necessary). */
4631 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4632 const_tree type, bool named)
4636 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4637 a call insn (op3 of a call_value insn). */
4638 if (mode == VOIDmode)
4641 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4643 aapcs_layout_arg (pcum, mode, type, named);
4644 return pcum->aapcs_reg;
4647 /* Varargs vectors are treated the same as long long.
4648 named_count avoids having to change the way arm handles 'named' */
4649 if (TARGET_IWMMXT_ABI
4650 && arm_vector_mode_supported_p (mode)
4651 && pcum->named_count > pcum->nargs + 1)
4653 if (pcum->iwmmxt_nregs <= 9)
4654 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4657 pcum->can_split = false;
4662 /* Put doubleword aligned quantities in even register pairs. */
4664 && ARM_DOUBLEWORD_ALIGN
4665 && arm_needs_doubleword_align (mode, type))
4668 /* Only allow splitting an arg between regs and memory if all preceding
4669 args were allocated to regs. For args passed by reference we only count
4670 the reference pointer. */
4671 if (pcum->can_split)
4674 nregs = ARM_NUM_REGS2 (mode, type);
4676 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4679 return gen_rtx_REG (mode, pcum->nregs);
4683 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4685 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4686 ? DOUBLEWORD_ALIGNMENT
4691 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4692 tree type, bool named)
4694 int nregs = pcum->nregs;
4696 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4698 aapcs_layout_arg (pcum, mode, type, named);
4699 return pcum->aapcs_partial;
4702 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4705 if (NUM_ARG_REGS > nregs
4706 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4708 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4713 /* Update the data in PCUM to advance over an argument
4714 of mode MODE and data type TYPE.
4715 (TYPE is null for libcalls where that information may not be available.) */
4718 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4719 const_tree type, bool named)
4721 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4723 aapcs_layout_arg (pcum, mode, type, named);
4725 if (pcum->aapcs_cprc_slot >= 0)
4727 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4729 pcum->aapcs_cprc_slot = -1;
4732 /* Generic stuff. */
4733 pcum->aapcs_arg_processed = false;
4734 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4735 pcum->aapcs_reg = NULL_RTX;
4736 pcum->aapcs_partial = 0;
4741 if (arm_vector_mode_supported_p (mode)
4742 && pcum->named_count > pcum->nargs
4743 && TARGET_IWMMXT_ABI)
4744 pcum->iwmmxt_nregs += 1;
4746 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4750 /* Variable sized types are passed by reference. This is a GCC
4751 extension to the ARM ABI. */
4754 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4755 enum machine_mode mode ATTRIBUTE_UNUSED,
4756 const_tree type, bool named ATTRIBUTE_UNUSED)
4758 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4761 /* Encode the current state of the #pragma [no_]long_calls. */
4764 OFF, /* No #pragma [no_]long_calls is in effect. */
4765 LONG, /* #pragma long_calls is in effect. */
4766 SHORT /* #pragma no_long_calls is in effect. */
4769 static arm_pragma_enum arm_pragma_long_calls = OFF;
4772 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4774 arm_pragma_long_calls = LONG;
4778 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4780 arm_pragma_long_calls = SHORT;
4784 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4786 arm_pragma_long_calls = OFF;
4789 /* Handle an attribute requiring a FUNCTION_DECL;
4790 arguments as in struct attribute_spec.handler. */
4792 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4793 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4795 if (TREE_CODE (*node) != FUNCTION_DECL)
4797 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4799 *no_add_attrs = true;
4805 /* Handle an "interrupt" or "isr" attribute;
4806 arguments as in struct attribute_spec.handler. */
4808 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4813 if (TREE_CODE (*node) != FUNCTION_DECL)
4815 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4817 *no_add_attrs = true;
4819 /* FIXME: the argument if any is checked for type attributes;
4820 should it be checked for decl ones? */
4824 if (TREE_CODE (*node) == FUNCTION_TYPE
4825 || TREE_CODE (*node) == METHOD_TYPE)
4827 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4829 warning (OPT_Wattributes, "%qE attribute ignored",
4831 *no_add_attrs = true;
4834 else if (TREE_CODE (*node) == POINTER_TYPE
4835 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4836 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4837 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4839 *node = build_variant_type_copy (*node);
4840 TREE_TYPE (*node) = build_type_attribute_variant
4842 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4843 *no_add_attrs = true;
4847 /* Possibly pass this attribute on from the type to a decl. */
4848 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4849 | (int) ATTR_FLAG_FUNCTION_NEXT
4850 | (int) ATTR_FLAG_ARRAY_NEXT))
4852 *no_add_attrs = true;
4853 return tree_cons (name, args, NULL_TREE);
4857 warning (OPT_Wattributes, "%qE attribute ignored",
4866 /* Handle a "pcs" attribute; arguments as in struct
4867 attribute_spec.handler. */
4869 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4870 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4872 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4874 warning (OPT_Wattributes, "%qE attribute ignored", name);
4875 *no_add_attrs = true;
4880 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4881 /* Handle the "notshared" attribute. This attribute is another way of
4882 requesting hidden visibility. ARM's compiler supports
4883 "__declspec(notshared)"; we support the same thing via an
4887 arm_handle_notshared_attribute (tree *node,
4888 tree name ATTRIBUTE_UNUSED,
4889 tree args ATTRIBUTE_UNUSED,
4890 int flags ATTRIBUTE_UNUSED,
4893 tree decl = TYPE_NAME (*node);
4897 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4898 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4899 *no_add_attrs = false;
4905 /* Return 0 if the attributes for two types are incompatible, 1 if they
4906 are compatible, and 2 if they are nearly compatible (which causes a
4907 warning to be generated). */
4909 arm_comp_type_attributes (const_tree type1, const_tree type2)
4913 /* Check for mismatch of non-default calling convention. */
4914 if (TREE_CODE (type1) != FUNCTION_TYPE)
4917 /* Check for mismatched call attributes. */
4918 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4919 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4920 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4921 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4923 /* Only bother to check if an attribute is defined. */
4924 if (l1 | l2 | s1 | s2)
4926 /* If one type has an attribute, the other must have the same attribute. */
4927 if ((l1 != l2) || (s1 != s2))
4930 /* Disallow mixed attributes. */
4931 if ((l1 & s2) || (l2 & s1))
4935 /* Check for mismatched ISR attribute. */
4936 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4938 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4939 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4941 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4948 /* Assigns default attributes to newly defined type. This is used to
4949 set short_call/long_call attributes for function types of
4950 functions defined inside corresponding #pragma scopes. */
4952 arm_set_default_type_attributes (tree type)
4954 /* Add __attribute__ ((long_call)) to all functions, when
4955 inside #pragma long_calls or __attribute__ ((short_call)),
4956 when inside #pragma no_long_calls. */
4957 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4959 tree type_attr_list, attr_name;
4960 type_attr_list = TYPE_ATTRIBUTES (type);
4962 if (arm_pragma_long_calls == LONG)
4963 attr_name = get_identifier ("long_call");
4964 else if (arm_pragma_long_calls == SHORT)
4965 attr_name = get_identifier ("short_call");
4969 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4970 TYPE_ATTRIBUTES (type) = type_attr_list;
4974 /* Return true if DECL is known to be linked into section SECTION. */
4977 arm_function_in_section_p (tree decl, section *section)
4979 /* We can only be certain about functions defined in the same
4980 compilation unit. */
4981 if (!TREE_STATIC (decl))
4984 /* Make sure that SYMBOL always binds to the definition in this
4985 compilation unit. */
4986 if (!targetm.binds_local_p (decl))
4989 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4990 if (!DECL_SECTION_NAME (decl))
4992 /* Make sure that we will not create a unique section for DECL. */
4993 if (flag_function_sections || DECL_ONE_ONLY (decl))
4997 return function_section (decl) == section;
5000 /* Return nonzero if a 32-bit "long_call" should be generated for
5001 a call from the current function to DECL. We generate a long_call
5004 a. has an __attribute__((long call))
5005 or b. is within the scope of a #pragma long_calls
5006 or c. the -mlong-calls command line switch has been specified
5008 However we do not generate a long call if the function:
5010 d. has an __attribute__ ((short_call))
5011 or e. is inside the scope of a #pragma no_long_calls
5012 or f. is defined in the same section as the current function. */
5015 arm_is_long_call_p (tree decl)
5020 return TARGET_LONG_CALLS;
5022 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5023 if (lookup_attribute ("short_call", attrs))
5026 /* For "f", be conservative, and only cater for cases in which the
5027 whole of the current function is placed in the same section. */
5028 if (!flag_reorder_blocks_and_partition
5029 && TREE_CODE (decl) == FUNCTION_DECL
5030 && arm_function_in_section_p (decl, current_function_section ()))
5033 if (lookup_attribute ("long_call", attrs))
5036 return TARGET_LONG_CALLS;
5039 /* Return nonzero if it is ok to make a tail-call to DECL. */
5041 arm_function_ok_for_sibcall (tree decl, tree exp)
5043 unsigned long func_type;
5045 if (cfun->machine->sibcall_blocked)
5048 /* Never tailcall something for which we have no decl, or if we
5049 are generating code for Thumb-1. */
5050 if (decl == NULL || TARGET_THUMB1)
5053 /* The PIC register is live on entry to VxWorks PLT entries, so we
5054 must make the call before restoring the PIC register. */
5055 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5058 /* Cannot tail-call to long calls, since these are out of range of
5059 a branch instruction. */
5060 if (arm_is_long_call_p (decl))
5063 /* If we are interworking and the function is not declared static
5064 then we can't tail-call it unless we know that it exists in this
5065 compilation unit (since it might be a Thumb routine). */
5066 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5069 func_type = arm_current_func_type ();
5070 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5071 if (IS_INTERRUPT (func_type))
5074 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5076 /* Check that the return value locations are the same. For
5077 example that we aren't returning a value from the sibling in
5078 a VFP register but then need to transfer it to a core
5082 a = arm_function_value (TREE_TYPE (exp), decl, false);
5083 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5085 if (!rtx_equal_p (a, b))
5089 /* Never tailcall if function may be called with a misaligned SP. */
5090 if (IS_STACKALIGN (func_type))
5093 /* Everything else is ok. */
5098 /* Addressing mode support functions. */
5100 /* Return nonzero if X is a legitimate immediate operand when compiling
5101 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5103 legitimate_pic_operand_p (rtx x)
5105 if (GET_CODE (x) == SYMBOL_REF
5106 || (GET_CODE (x) == CONST
5107 && GET_CODE (XEXP (x, 0)) == PLUS
5108 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5114 /* Record that the current function needs a PIC register. Initialize
5115 cfun->machine->pic_reg if we have not already done so. */
5118 require_pic_register (void)
5120 /* A lot of the logic here is made obscure by the fact that this
5121 routine gets called as part of the rtx cost estimation process.
5122 We don't want those calls to affect any assumptions about the real
5123 function; and further, we can't call entry_of_function() until we
5124 start the real expansion process. */
5125 if (!crtl->uses_pic_offset_table)
5127 gcc_assert (can_create_pseudo_p ());
5128 if (arm_pic_register != INVALID_REGNUM)
5130 if (!cfun->machine->pic_reg)
5131 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5133 /* Play games to avoid marking the function as needing pic
5134 if we are being called as part of the cost-estimation
5136 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5137 crtl->uses_pic_offset_table = 1;
5143 if (!cfun->machine->pic_reg)
5144 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5146 /* Play games to avoid marking the function as needing pic
5147 if we are being called as part of the cost-estimation
5149 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5151 crtl->uses_pic_offset_table = 1;
5154 arm_load_pic_register (0UL);
5159 for (insn = seq; insn; insn = NEXT_INSN (insn))
5161 INSN_LOCATOR (insn) = prologue_locator;
5163 /* We can be called during expansion of PHI nodes, where
5164 we can't yet emit instructions directly in the final
5165 insn stream. Queue the insns on the entry edge, they will
5166 be committed after everything else is expanded. */
5167 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5174 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5176 if (GET_CODE (orig) == SYMBOL_REF
5177 || GET_CODE (orig) == LABEL_REF)
5183 gcc_assert (can_create_pseudo_p ());
5184 reg = gen_reg_rtx (Pmode);
5187 /* VxWorks does not impose a fixed gap between segments; the run-time
5188 gap can be different from the object-file gap. We therefore can't
5189 use GOTOFF unless we are absolutely sure that the symbol is in the
5190 same segment as the GOT. Unfortunately, the flexibility of linker
5191 scripts means that we can't be sure of that in general, so assume
5192 that GOTOFF is never valid on VxWorks. */
5193 if ((GET_CODE (orig) == LABEL_REF
5194 || (GET_CODE (orig) == SYMBOL_REF &&
5195 SYMBOL_REF_LOCAL_P (orig)))
5197 && !TARGET_VXWORKS_RTP)
5198 insn = arm_pic_static_addr (orig, reg);
5204 /* If this function doesn't have a pic register, create one now. */
5205 require_pic_register ();
5207 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5209 /* Make the MEM as close to a constant as possible. */
5210 mem = SET_SRC (pat);
5211 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5212 MEM_READONLY_P (mem) = 1;
5213 MEM_NOTRAP_P (mem) = 1;
5215 insn = emit_insn (pat);
5218 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5220 set_unique_reg_note (insn, REG_EQUAL, orig);
5224 else if (GET_CODE (orig) == CONST)
5228 if (GET_CODE (XEXP (orig, 0)) == PLUS
5229 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5232 /* Handle the case where we have: const (UNSPEC_TLS). */
5233 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5234 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5237 /* Handle the case where we have:
5238 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5240 if (GET_CODE (XEXP (orig, 0)) == PLUS
5241 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5242 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5244 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5250 gcc_assert (can_create_pseudo_p ());
5251 reg = gen_reg_rtx (Pmode);
5254 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5256 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5257 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5258 base == reg ? 0 : reg);
5260 if (GET_CODE (offset) == CONST_INT)
5262 /* The base register doesn't really matter, we only want to
5263 test the index for the appropriate mode. */
5264 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5266 gcc_assert (can_create_pseudo_p ());
5267 offset = force_reg (Pmode, offset);
5270 if (GET_CODE (offset) == CONST_INT)
5271 return plus_constant (base, INTVAL (offset));
5274 if (GET_MODE_SIZE (mode) > 4
5275 && (GET_MODE_CLASS (mode) == MODE_INT
5276 || TARGET_SOFT_FLOAT))
5278 emit_insn (gen_addsi3 (reg, base, offset));
5282 return gen_rtx_PLUS (Pmode, base, offset);
5289 /* Find a spare register to use during the prolog of a function. */
5292 thumb_find_work_register (unsigned long pushed_regs_mask)
5296 /* Check the argument registers first as these are call-used. The
5297 register allocation order means that sometimes r3 might be used
5298 but earlier argument registers might not, so check them all. */
5299 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5300 if (!df_regs_ever_live_p (reg))
5303 /* Before going on to check the call-saved registers we can try a couple
5304 more ways of deducing that r3 is available. The first is when we are
5305 pushing anonymous arguments onto the stack and we have less than 4
5306 registers worth of fixed arguments(*). In this case r3 will be part of
5307 the variable argument list and so we can be sure that it will be
5308 pushed right at the start of the function. Hence it will be available
5309 for the rest of the prologue.
5310 (*): ie crtl->args.pretend_args_size is greater than 0. */
5311 if (cfun->machine->uses_anonymous_args
5312 && crtl->args.pretend_args_size > 0)
5313 return LAST_ARG_REGNUM;
5315 /* The other case is when we have fixed arguments but less than 4 registers
5316 worth. In this case r3 might be used in the body of the function, but
5317 it is not being used to convey an argument into the function. In theory
5318 we could just check crtl->args.size to see how many bytes are
5319 being passed in argument registers, but it seems that it is unreliable.
5320 Sometimes it will have the value 0 when in fact arguments are being
5321 passed. (See testcase execute/20021111-1.c for an example). So we also
5322 check the args_info.nregs field as well. The problem with this field is
5323 that it makes no allowances for arguments that are passed to the
5324 function but which are not used. Hence we could miss an opportunity
5325 when a function has an unused argument in r3. But it is better to be
5326 safe than to be sorry. */
5327 if (! cfun->machine->uses_anonymous_args
5328 && crtl->args.size >= 0
5329 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5330 && crtl->args.info.nregs < 4)
5331 return LAST_ARG_REGNUM;
5333 /* Otherwise look for a call-saved register that is going to be pushed. */
5334 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5335 if (pushed_regs_mask & (1 << reg))
5340 /* Thumb-2 can use high regs. */
5341 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5342 if (pushed_regs_mask & (1 << reg))
5345 /* Something went wrong - thumb_compute_save_reg_mask()
5346 should have arranged for a suitable register to be pushed. */
5350 static GTY(()) int pic_labelno;
5352 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5356 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5358 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5360 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5363 gcc_assert (flag_pic);
5365 pic_reg = cfun->machine->pic_reg;
5366 if (TARGET_VXWORKS_RTP)
5368 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5369 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5370 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5372 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5374 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5375 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5379 /* We use an UNSPEC rather than a LABEL_REF because this label
5380 never appears in the code stream. */
5382 labelno = GEN_INT (pic_labelno++);
5383 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5384 l1 = gen_rtx_CONST (VOIDmode, l1);
5386 /* On the ARM the PC register contains 'dot + 8' at the time of the
5387 addition, on the Thumb it is 'dot + 4'. */
5388 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5389 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5391 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5395 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5397 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5399 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5401 else /* TARGET_THUMB1 */
5403 if (arm_pic_register != INVALID_REGNUM
5404 && REGNO (pic_reg) > LAST_LO_REGNUM)
5406 /* We will have pushed the pic register, so we should always be
5407 able to find a work register. */
5408 pic_tmp = gen_rtx_REG (SImode,
5409 thumb_find_work_register (saved_regs));
5410 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5411 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5414 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5415 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5419 /* Need to emit this whether or not we obey regdecls,
5420 since setjmp/longjmp can cause life info to screw up. */
5424 /* Generate code to load the address of a static var when flag_pic is set. */
5426 arm_pic_static_addr (rtx orig, rtx reg)
5428 rtx l1, labelno, offset_rtx, insn;
5430 gcc_assert (flag_pic);
5432 /* We use an UNSPEC rather than a LABEL_REF because this label
5433 never appears in the code stream. */
5434 labelno = GEN_INT (pic_labelno++);
5435 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5436 l1 = gen_rtx_CONST (VOIDmode, l1);
5438 /* On the ARM the PC register contains 'dot + 8' at the time of the
5439 addition, on the Thumb it is 'dot + 4'. */
5440 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5441 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5442 UNSPEC_SYMBOL_OFFSET);
5443 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5447 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5449 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5451 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5453 else /* TARGET_THUMB1 */
5455 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5456 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5462 /* Return nonzero if X is valid as an ARM state addressing register. */
5464 arm_address_register_rtx_p (rtx x, int strict_p)
5468 if (GET_CODE (x) != REG)
5474 return ARM_REGNO_OK_FOR_BASE_P (regno);
5476 return (regno <= LAST_ARM_REGNUM
5477 || regno >= FIRST_PSEUDO_REGISTER
5478 || regno == FRAME_POINTER_REGNUM
5479 || regno == ARG_POINTER_REGNUM);
5482 /* Return TRUE if this rtx is the difference of a symbol and a label,
5483 and will reduce to a PC-relative relocation in the object file.
5484 Expressions like this can be left alone when generating PIC, rather
5485 than forced through the GOT. */
5487 pcrel_constant_p (rtx x)
5489 if (GET_CODE (x) == MINUS)
5490 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5495 /* Return true if X will surely end up in an index register after next
5498 will_be_in_index_register (const_rtx x)
5500 /* arm.md: calculate_pic_address will split this into a register. */
5501 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5504 /* Return nonzero if X is a valid ARM state address operand. */
5506 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5510 enum rtx_code code = GET_CODE (x);
5512 if (arm_address_register_rtx_p (x, strict_p))
5515 use_ldrd = (TARGET_LDRD
5517 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5519 if (code == POST_INC || code == PRE_DEC
5520 || ((code == PRE_INC || code == POST_DEC)
5521 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5522 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5524 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5525 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5526 && GET_CODE (XEXP (x, 1)) == PLUS
5527 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5529 rtx addend = XEXP (XEXP (x, 1), 1);
5531 /* Don't allow ldrd post increment by register because it's hard
5532 to fixup invalid register choices. */
5534 && GET_CODE (x) == POST_MODIFY
5535 && GET_CODE (addend) == REG)
5538 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5539 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5542 /* After reload constants split into minipools will have addresses
5543 from a LABEL_REF. */
5544 else if (reload_completed
5545 && (code == LABEL_REF
5547 && GET_CODE (XEXP (x, 0)) == PLUS
5548 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5549 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5552 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5555 else if (code == PLUS)
5557 rtx xop0 = XEXP (x, 0);
5558 rtx xop1 = XEXP (x, 1);
5560 return ((arm_address_register_rtx_p (xop0, strict_p)
5561 && ((GET_CODE(xop1) == CONST_INT
5562 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5563 || (!strict_p && will_be_in_index_register (xop1))))
5564 || (arm_address_register_rtx_p (xop1, strict_p)
5565 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5569 /* Reload currently can't handle MINUS, so disable this for now */
5570 else if (GET_CODE (x) == MINUS)
5572 rtx xop0 = XEXP (x, 0);
5573 rtx xop1 = XEXP (x, 1);
5575 return (arm_address_register_rtx_p (xop0, strict_p)
5576 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5580 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5581 && code == SYMBOL_REF
5582 && CONSTANT_POOL_ADDRESS_P (x)
5584 && symbol_mentioned_p (get_pool_constant (x))
5585 && ! pcrel_constant_p (get_pool_constant (x))))
5591 /* Return nonzero if X is a valid Thumb-2 address operand. */
5593 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5596 enum rtx_code code = GET_CODE (x);
5598 if (arm_address_register_rtx_p (x, strict_p))
5601 use_ldrd = (TARGET_LDRD
5603 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5605 if (code == POST_INC || code == PRE_DEC
5606 || ((code == PRE_INC || code == POST_DEC)
5607 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5608 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5610 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5611 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5612 && GET_CODE (XEXP (x, 1)) == PLUS
5613 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5615 /* Thumb-2 only has autoincrement by constant. */
5616 rtx addend = XEXP (XEXP (x, 1), 1);
5617 HOST_WIDE_INT offset;
5619 if (GET_CODE (addend) != CONST_INT)
5622 offset = INTVAL(addend);
5623 if (GET_MODE_SIZE (mode) <= 4)
5624 return (offset > -256 && offset < 256);
5626 return (use_ldrd && offset > -1024 && offset < 1024
5627 && (offset & 3) == 0);
5630 /* After reload constants split into minipools will have addresses
5631 from a LABEL_REF. */
5632 else if (reload_completed
5633 && (code == LABEL_REF
5635 && GET_CODE (XEXP (x, 0)) == PLUS
5636 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5637 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5640 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5643 else if (code == PLUS)
5645 rtx xop0 = XEXP (x, 0);
5646 rtx xop1 = XEXP (x, 1);
5648 return ((arm_address_register_rtx_p (xop0, strict_p)
5649 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5650 || (!strict_p && will_be_in_index_register (xop1))))
5651 || (arm_address_register_rtx_p (xop1, strict_p)
5652 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5655 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5656 && code == SYMBOL_REF
5657 && CONSTANT_POOL_ADDRESS_P (x)
5659 && symbol_mentioned_p (get_pool_constant (x))
5660 && ! pcrel_constant_p (get_pool_constant (x))))
5666 /* Return nonzero if INDEX is valid for an address index operand in
5669 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5672 HOST_WIDE_INT range;
5673 enum rtx_code code = GET_CODE (index);
5675 /* Standard coprocessor addressing modes. */
5676 if (TARGET_HARD_FLOAT
5677 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5678 && (mode == SFmode || mode == DFmode
5679 || (TARGET_MAVERICK && mode == DImode)))
5680 return (code == CONST_INT && INTVAL (index) < 1024
5681 && INTVAL (index) > -1024
5682 && (INTVAL (index) & 3) == 0);
5684 /* For quad modes, we restrict the constant offset to be slightly less
5685 than what the instruction format permits. We do this because for
5686 quad mode moves, we will actually decompose them into two separate
5687 double-mode reads or writes. INDEX must therefore be a valid
5688 (double-mode) offset and so should INDEX+8. */
5689 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5690 return (code == CONST_INT
5691 && INTVAL (index) < 1016
5692 && INTVAL (index) > -1024
5693 && (INTVAL (index) & 3) == 0);
5695 /* We have no such constraint on double mode offsets, so we permit the
5696 full range of the instruction format. */
5697 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5698 return (code == CONST_INT
5699 && INTVAL (index) < 1024
5700 && INTVAL (index) > -1024
5701 && (INTVAL (index) & 3) == 0);
5703 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5704 return (code == CONST_INT
5705 && INTVAL (index) < 1024
5706 && INTVAL (index) > -1024
5707 && (INTVAL (index) & 3) == 0);
5709 if (arm_address_register_rtx_p (index, strict_p)
5710 && (GET_MODE_SIZE (mode) <= 4))
5713 if (mode == DImode || mode == DFmode)
5715 if (code == CONST_INT)
5717 HOST_WIDE_INT val = INTVAL (index);
5720 return val > -256 && val < 256;
5722 return val > -4096 && val < 4092;
5725 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5728 if (GET_MODE_SIZE (mode) <= 4
5732 || (mode == QImode && outer == SIGN_EXTEND))))
5736 rtx xiop0 = XEXP (index, 0);
5737 rtx xiop1 = XEXP (index, 1);
5739 return ((arm_address_register_rtx_p (xiop0, strict_p)
5740 && power_of_two_operand (xiop1, SImode))
5741 || (arm_address_register_rtx_p (xiop1, strict_p)
5742 && power_of_two_operand (xiop0, SImode)));
5744 else if (code == LSHIFTRT || code == ASHIFTRT
5745 || code == ASHIFT || code == ROTATERT)
5747 rtx op = XEXP (index, 1);
5749 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5750 && GET_CODE (op) == CONST_INT
5752 && INTVAL (op) <= 31);
5756 /* For ARM v4 we may be doing a sign-extend operation during the
5762 || (outer == SIGN_EXTEND && mode == QImode))
5768 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5770 return (code == CONST_INT
5771 && INTVAL (index) < range
5772 && INTVAL (index) > -range);
5775 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5776 index operand. i.e. 1, 2, 4 or 8. */
5778 thumb2_index_mul_operand (rtx op)
5782 if (GET_CODE(op) != CONST_INT)
5786 return (val == 1 || val == 2 || val == 4 || val == 8);
5789 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5791 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5793 enum rtx_code code = GET_CODE (index);
5795 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5796 /* Standard coprocessor addressing modes. */
5797 if (TARGET_HARD_FLOAT
5798 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5799 && (mode == SFmode || mode == DFmode
5800 || (TARGET_MAVERICK && mode == DImode)))
5801 return (code == CONST_INT && INTVAL (index) < 1024
5802 /* Thumb-2 allows only > -256 index range for it's core register
5803 load/stores. Since we allow SF/DF in core registers, we have
5804 to use the intersection between -256~4096 (core) and -1024~1024
5806 && INTVAL (index) > -256
5807 && (INTVAL (index) & 3) == 0);
5809 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5811 /* For DImode assume values will usually live in core regs
5812 and only allow LDRD addressing modes. */
5813 if (!TARGET_LDRD || mode != DImode)
5814 return (code == CONST_INT
5815 && INTVAL (index) < 1024
5816 && INTVAL (index) > -1024
5817 && (INTVAL (index) & 3) == 0);
5820 /* For quad modes, we restrict the constant offset to be slightly less
5821 than what the instruction format permits. We do this because for
5822 quad mode moves, we will actually decompose them into two separate
5823 double-mode reads or writes. INDEX must therefore be a valid
5824 (double-mode) offset and so should INDEX+8. */
5825 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5826 return (code == CONST_INT
5827 && INTVAL (index) < 1016
5828 && INTVAL (index) > -1024
5829 && (INTVAL (index) & 3) == 0);
5831 /* We have no such constraint on double mode offsets, so we permit the
5832 full range of the instruction format. */
5833 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5834 return (code == CONST_INT
5835 && INTVAL (index) < 1024
5836 && INTVAL (index) > -1024
5837 && (INTVAL (index) & 3) == 0);
5839 if (arm_address_register_rtx_p (index, strict_p)
5840 && (GET_MODE_SIZE (mode) <= 4))
5843 if (mode == DImode || mode == DFmode)
5845 if (code == CONST_INT)
5847 HOST_WIDE_INT val = INTVAL (index);
5848 /* ??? Can we assume ldrd for thumb2? */
5849 /* Thumb-2 ldrd only has reg+const addressing modes. */
5850 /* ldrd supports offsets of +-1020.
5851 However the ldr fallback does not. */
5852 return val > -256 && val < 256 && (val & 3) == 0;
5860 rtx xiop0 = XEXP (index, 0);
5861 rtx xiop1 = XEXP (index, 1);
5863 return ((arm_address_register_rtx_p (xiop0, strict_p)
5864 && thumb2_index_mul_operand (xiop1))
5865 || (arm_address_register_rtx_p (xiop1, strict_p)
5866 && thumb2_index_mul_operand (xiop0)));
5868 else if (code == ASHIFT)
5870 rtx op = XEXP (index, 1);
5872 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5873 && GET_CODE (op) == CONST_INT
5875 && INTVAL (op) <= 3);
5878 return (code == CONST_INT
5879 && INTVAL (index) < 4096
5880 && INTVAL (index) > -256);
5883 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5885 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5889 if (GET_CODE (x) != REG)
5895 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5897 return (regno <= LAST_LO_REGNUM
5898 || regno > LAST_VIRTUAL_REGISTER
5899 || regno == FRAME_POINTER_REGNUM
5900 || (GET_MODE_SIZE (mode) >= 4
5901 && (regno == STACK_POINTER_REGNUM
5902 || regno >= FIRST_PSEUDO_REGISTER
5903 || x == hard_frame_pointer_rtx
5904 || x == arg_pointer_rtx)));
5907 /* Return nonzero if x is a legitimate index register. This is the case
5908 for any base register that can access a QImode object. */
5910 thumb1_index_register_rtx_p (rtx x, int strict_p)
5912 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5915 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5917 The AP may be eliminated to either the SP or the FP, so we use the
5918 least common denominator, e.g. SImode, and offsets from 0 to 64.
5920 ??? Verify whether the above is the right approach.
5922 ??? Also, the FP may be eliminated to the SP, so perhaps that
5923 needs special handling also.
5925 ??? Look at how the mips16 port solves this problem. It probably uses
5926 better ways to solve some of these problems.
5928 Although it is not incorrect, we don't accept QImode and HImode
5929 addresses based on the frame pointer or arg pointer until the
5930 reload pass starts. This is so that eliminating such addresses
5931 into stack based ones won't produce impossible code. */
5933 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5935 /* ??? Not clear if this is right. Experiment. */
5936 if (GET_MODE_SIZE (mode) < 4
5937 && !(reload_in_progress || reload_completed)
5938 && (reg_mentioned_p (frame_pointer_rtx, x)
5939 || reg_mentioned_p (arg_pointer_rtx, x)
5940 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5941 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5942 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5943 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5946 /* Accept any base register. SP only in SImode or larger. */
5947 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5950 /* This is PC relative data before arm_reorg runs. */
5951 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5952 && GET_CODE (x) == SYMBOL_REF
5953 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5956 /* This is PC relative data after arm_reorg runs. */
5957 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5959 && (GET_CODE (x) == LABEL_REF
5960 || (GET_CODE (x) == CONST
5961 && GET_CODE (XEXP (x, 0)) == PLUS
5962 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5963 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5966 /* Post-inc indexing only supported for SImode and larger. */
5967 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5968 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5971 else if (GET_CODE (x) == PLUS)
5973 /* REG+REG address can be any two index registers. */
5974 /* We disallow FRAME+REG addressing since we know that FRAME
5975 will be replaced with STACK, and SP relative addressing only
5976 permits SP+OFFSET. */
5977 if (GET_MODE_SIZE (mode) <= 4
5978 && XEXP (x, 0) != frame_pointer_rtx
5979 && XEXP (x, 1) != frame_pointer_rtx
5980 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5981 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5982 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5985 /* REG+const has 5-7 bit offset for non-SP registers. */
5986 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5987 || XEXP (x, 0) == arg_pointer_rtx)
5988 && GET_CODE (XEXP (x, 1)) == CONST_INT
5989 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5992 /* REG+const has 10-bit offset for SP, but only SImode and
5993 larger is supported. */
5994 /* ??? Should probably check for DI/DFmode overflow here
5995 just like GO_IF_LEGITIMATE_OFFSET does. */
5996 else if (GET_CODE (XEXP (x, 0)) == REG
5997 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5998 && GET_MODE_SIZE (mode) >= 4
5999 && GET_CODE (XEXP (x, 1)) == CONST_INT
6000 && INTVAL (XEXP (x, 1)) >= 0
6001 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6002 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6005 else if (GET_CODE (XEXP (x, 0)) == REG
6006 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6007 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6008 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6009 && REGNO (XEXP (x, 0))
6010 <= LAST_VIRTUAL_POINTER_REGISTER))
6011 && GET_MODE_SIZE (mode) >= 4
6012 && GET_CODE (XEXP (x, 1)) == CONST_INT
6013 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6017 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6018 && GET_MODE_SIZE (mode) == 4
6019 && GET_CODE (x) == SYMBOL_REF
6020 && CONSTANT_POOL_ADDRESS_P (x)
6022 && symbol_mentioned_p (get_pool_constant (x))
6023 && ! pcrel_constant_p (get_pool_constant (x))))
6029 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6030 instruction of mode MODE. */
6032 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6034 switch (GET_MODE_SIZE (mode))
6037 return val >= 0 && val < 32;
6040 return val >= 0 && val < 64 && (val & 1) == 0;
6044 && (val + GET_MODE_SIZE (mode)) <= 128
6050 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6053 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6054 else if (TARGET_THUMB2)
6055 return thumb2_legitimate_address_p (mode, x, strict_p);
6056 else /* if (TARGET_THUMB1) */
6057 return thumb1_legitimate_address_p (mode, x, strict_p);
6060 /* Build the SYMBOL_REF for __tls_get_addr. */
6062 static GTY(()) rtx tls_get_addr_libfunc;
6065 get_tls_get_addr (void)
6067 if (!tls_get_addr_libfunc)
6068 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6069 return tls_get_addr_libfunc;
6073 arm_load_tp (rtx target)
6076 target = gen_reg_rtx (SImode);
6080 /* Can return in any reg. */
6081 emit_insn (gen_load_tp_hard (target));
6085 /* Always returned in r0. Immediately copy the result into a pseudo,
6086 otherwise other uses of r0 (e.g. setting up function arguments) may
6087 clobber the value. */
6091 emit_insn (gen_load_tp_soft ());
6093 tmp = gen_rtx_REG (SImode, 0);
6094 emit_move_insn (target, tmp);
6100 load_tls_operand (rtx x, rtx reg)
6104 if (reg == NULL_RTX)
6105 reg = gen_reg_rtx (SImode);
6107 tmp = gen_rtx_CONST (SImode, x);
6109 emit_move_insn (reg, tmp);
6115 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6117 rtx insns, label, labelno, sum;
6121 labelno = GEN_INT (pic_labelno++);
6122 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6123 label = gen_rtx_CONST (VOIDmode, label);
6125 sum = gen_rtx_UNSPEC (Pmode,
6126 gen_rtvec (4, x, GEN_INT (reloc), label,
6127 GEN_INT (TARGET_ARM ? 8 : 4)),
6129 reg = load_tls_operand (sum, reg);
6132 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6133 else if (TARGET_THUMB2)
6134 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6135 else /* TARGET_THUMB1 */
6136 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6138 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
6139 Pmode, 1, reg, Pmode);
6141 insns = get_insns ();
6148 legitimize_tls_address (rtx x, rtx reg)
6150 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6151 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6155 case TLS_MODEL_GLOBAL_DYNAMIC:
6156 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6157 dest = gen_reg_rtx (Pmode);
6158 emit_libcall_block (insns, dest, ret, x);
6161 case TLS_MODEL_LOCAL_DYNAMIC:
6162 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6164 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6165 share the LDM result with other LD model accesses. */
6166 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6168 dest = gen_reg_rtx (Pmode);
6169 emit_libcall_block (insns, dest, ret, eqv);
6171 /* Load the addend. */
6172 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
6174 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6175 return gen_rtx_PLUS (Pmode, dest, addend);
6177 case TLS_MODEL_INITIAL_EXEC:
6178 labelno = GEN_INT (pic_labelno++);
6179 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6180 label = gen_rtx_CONST (VOIDmode, label);
6181 sum = gen_rtx_UNSPEC (Pmode,
6182 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6183 GEN_INT (TARGET_ARM ? 8 : 4)),
6185 reg = load_tls_operand (sum, reg);
6188 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6189 else if (TARGET_THUMB2)
6190 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6193 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6194 emit_move_insn (reg, gen_const_mem (SImode, reg));
6197 tp = arm_load_tp (NULL_RTX);
6199 return gen_rtx_PLUS (Pmode, tp, reg);
6201 case TLS_MODEL_LOCAL_EXEC:
6202 tp = arm_load_tp (NULL_RTX);
6204 reg = gen_rtx_UNSPEC (Pmode,
6205 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6207 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6209 return gen_rtx_PLUS (Pmode, tp, reg);
6216 /* Try machine-dependent ways of modifying an illegitimate address
6217 to be legitimate. If we find one, return the new, valid address. */
6219 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6223 /* TODO: legitimize_address for Thumb2. */
6226 return thumb_legitimize_address (x, orig_x, mode);
6229 if (arm_tls_symbol_p (x))
6230 return legitimize_tls_address (x, NULL_RTX);
6232 if (GET_CODE (x) == PLUS)
6234 rtx xop0 = XEXP (x, 0);
6235 rtx xop1 = XEXP (x, 1);
6237 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6238 xop0 = force_reg (SImode, xop0);
6240 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6241 xop1 = force_reg (SImode, xop1);
6243 if (ARM_BASE_REGISTER_RTX_P (xop0)
6244 && GET_CODE (xop1) == CONST_INT)
6246 HOST_WIDE_INT n, low_n;
6250 /* VFP addressing modes actually allow greater offsets, but for
6251 now we just stick with the lowest common denominator. */
6253 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6265 low_n = ((mode) == TImode ? 0
6266 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6270 base_reg = gen_reg_rtx (SImode);
6271 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6272 emit_move_insn (base_reg, val);
6273 x = plus_constant (base_reg, low_n);
6275 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6276 x = gen_rtx_PLUS (SImode, xop0, xop1);
6279 /* XXX We don't allow MINUS any more -- see comment in
6280 arm_legitimate_address_outer_p (). */
6281 else if (GET_CODE (x) == MINUS)
6283 rtx xop0 = XEXP (x, 0);
6284 rtx xop1 = XEXP (x, 1);
6286 if (CONSTANT_P (xop0))
6287 xop0 = force_reg (SImode, xop0);
6289 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6290 xop1 = force_reg (SImode, xop1);
6292 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6293 x = gen_rtx_MINUS (SImode, xop0, xop1);
6296 /* Make sure to take full advantage of the pre-indexed addressing mode
6297 with absolute addresses which often allows for the base register to
6298 be factorized for multiple adjacent memory references, and it might
6299 even allows for the mini pool to be avoided entirely. */
6300 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6303 HOST_WIDE_INT mask, base, index;
6306 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6307 use a 8-bit index. So let's use a 12-bit index for SImode only and
6308 hope that arm_gen_constant will enable ldrb to use more bits. */
6309 bits = (mode == SImode) ? 12 : 8;
6310 mask = (1 << bits) - 1;
6311 base = INTVAL (x) & ~mask;
6312 index = INTVAL (x) & mask;
6313 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6315 /* It'll most probably be more efficient to generate the base
6316 with more bits set and use a negative index instead. */
6320 base_reg = force_reg (SImode, GEN_INT (base));
6321 x = plus_constant (base_reg, index);
6326 /* We need to find and carefully transform any SYMBOL and LABEL
6327 references; so go back to the original address expression. */
6328 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6330 if (new_x != orig_x)
6338 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6339 to be legitimate. If we find one, return the new, valid address. */
6341 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6343 if (arm_tls_symbol_p (x))
6344 return legitimize_tls_address (x, NULL_RTX);
6346 if (GET_CODE (x) == PLUS
6347 && GET_CODE (XEXP (x, 1)) == CONST_INT
6348 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6349 || INTVAL (XEXP (x, 1)) < 0))
6351 rtx xop0 = XEXP (x, 0);
6352 rtx xop1 = XEXP (x, 1);
6353 HOST_WIDE_INT offset = INTVAL (xop1);
6355 /* Try and fold the offset into a biasing of the base register and
6356 then offsetting that. Don't do this when optimizing for space
6357 since it can cause too many CSEs. */
6358 if (optimize_size && offset >= 0
6359 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6361 HOST_WIDE_INT delta;
6364 delta = offset - (256 - GET_MODE_SIZE (mode));
6365 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6366 delta = 31 * GET_MODE_SIZE (mode);
6368 delta = offset & (~31 * GET_MODE_SIZE (mode));
6370 xop0 = force_operand (plus_constant (xop0, offset - delta),
6372 x = plus_constant (xop0, delta);
6374 else if (offset < 0 && offset > -256)
6375 /* Small negative offsets are best done with a subtract before the
6376 dereference, forcing these into a register normally takes two
6378 x = force_operand (x, NULL_RTX);
6381 /* For the remaining cases, force the constant into a register. */
6382 xop1 = force_reg (SImode, xop1);
6383 x = gen_rtx_PLUS (SImode, xop0, xop1);
6386 else if (GET_CODE (x) == PLUS
6387 && s_register_operand (XEXP (x, 1), SImode)
6388 && !s_register_operand (XEXP (x, 0), SImode))
6390 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6392 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6397 /* We need to find and carefully transform any SYMBOL and LABEL
6398 references; so go back to the original address expression. */
6399 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6401 if (new_x != orig_x)
6409 arm_legitimize_reload_address (rtx *p,
6410 enum machine_mode mode,
6411 int opnum, int type,
6412 int ind_levels ATTRIBUTE_UNUSED)
6414 if (GET_CODE (*p) == PLUS
6415 && GET_CODE (XEXP (*p, 0)) == REG
6416 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6417 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6419 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6420 HOST_WIDE_INT low, high;
6422 if (mode == DImode || (mode == DFmode && TARGET_SOFT_FLOAT))
6423 low = ((val & 0xf) ^ 0x8) - 0x8;
6424 else if (TARGET_MAVERICK && TARGET_HARD_FLOAT)
6425 /* Need to be careful, -256 is not a valid offset. */
6426 low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);
6427 else if (mode == SImode
6428 || (mode == SFmode && TARGET_SOFT_FLOAT)
6429 || ((mode == HImode || mode == QImode) && ! arm_arch4))
6430 /* Need to be careful, -4096 is not a valid offset. */
6431 low = val >= 0 ? (val & 0xfff) : -((-val) & 0xfff);
6432 else if ((mode == HImode || mode == QImode) && arm_arch4)
6433 /* Need to be careful, -256 is not a valid offset. */
6434 low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);
6435 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6436 && TARGET_HARD_FLOAT && TARGET_FPA)
6437 /* Need to be careful, -1024 is not a valid offset. */
6438 low = val >= 0 ? (val & 0x3ff) : -((-val) & 0x3ff);
6442 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6443 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6444 - (unsigned HOST_WIDE_INT) 0x80000000);
6445 /* Check for overflow or zero */
6446 if (low == 0 || high == 0 || (high + low != val))
6449 /* Reload the high part into a base reg; leave the low part
6451 *p = gen_rtx_PLUS (GET_MODE (*p),
6452 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6455 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6456 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6457 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6465 thumb_legitimize_reload_address (rtx *x_p,
6466 enum machine_mode mode,
6467 int opnum, int type,
6468 int ind_levels ATTRIBUTE_UNUSED)
6472 if (GET_CODE (x) == PLUS
6473 && GET_MODE_SIZE (mode) < 4
6474 && REG_P (XEXP (x, 0))
6475 && XEXP (x, 0) == stack_pointer_rtx
6476 && GET_CODE (XEXP (x, 1)) == CONST_INT
6477 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6482 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6483 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6487 /* If both registers are hi-regs, then it's better to reload the
6488 entire expression rather than each register individually. That
6489 only requires one reload register rather than two. */
6490 if (GET_CODE (x) == PLUS
6491 && REG_P (XEXP (x, 0))
6492 && REG_P (XEXP (x, 1))
6493 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6494 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6499 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6500 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6507 /* Test for various thread-local symbols. */
6509 /* Return TRUE if X is a thread-local symbol. */
6512 arm_tls_symbol_p (rtx x)
6514 if (! TARGET_HAVE_TLS)
6517 if (GET_CODE (x) != SYMBOL_REF)
6520 return SYMBOL_REF_TLS_MODEL (x) != 0;
6523 /* Helper for arm_tls_referenced_p. */
6526 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6528 if (GET_CODE (*x) == SYMBOL_REF)
6529 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6531 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6532 TLS offsets, not real symbol references. */
6533 if (GET_CODE (*x) == UNSPEC
6534 && XINT (*x, 1) == UNSPEC_TLS)
6540 /* Return TRUE if X contains any TLS symbol references. */
6543 arm_tls_referenced_p (rtx x)
6545 if (! TARGET_HAVE_TLS)
6548 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6551 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6554 arm_cannot_force_const_mem (rtx x)
6558 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6560 split_const (x, &base, &offset);
6561 if (GET_CODE (base) == SYMBOL_REF
6562 && !offset_within_block_p (base, INTVAL (offset)))
6565 return arm_tls_referenced_p (x);
6568 #define REG_OR_SUBREG_REG(X) \
6569 (GET_CODE (X) == REG \
6570 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6572 #define REG_OR_SUBREG_RTX(X) \
6573 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6576 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6578 enum machine_mode mode = GET_MODE (x);
6592 return COSTS_N_INSNS (1);
6595 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6598 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6605 return COSTS_N_INSNS (2) + cycles;
6607 return COSTS_N_INSNS (1) + 16;
6610 return (COSTS_N_INSNS (1)
6611 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6612 + GET_CODE (SET_DEST (x)) == MEM));
6617 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6619 if (thumb_shiftable_const (INTVAL (x)))
6620 return COSTS_N_INSNS (2);
6621 return COSTS_N_INSNS (3);
6623 else if ((outer == PLUS || outer == COMPARE)
6624 && INTVAL (x) < 256 && INTVAL (x) > -256)
6626 else if ((outer == IOR || outer == XOR || outer == AND)
6627 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6628 return COSTS_N_INSNS (1);
6629 else if (outer == AND)
6632 /* This duplicates the tests in the andsi3 expander. */
6633 for (i = 9; i <= 31; i++)
6634 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6635 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6636 return COSTS_N_INSNS (2);
6638 else if (outer == ASHIFT || outer == ASHIFTRT
6639 || outer == LSHIFTRT)
6641 return COSTS_N_INSNS (2);
6647 return COSTS_N_INSNS (3);
6665 /* XXX another guess. */
6666 /* Memory costs quite a lot for the first word, but subsequent words
6667 load at the equivalent of a single insn each. */
6668 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6669 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6674 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6680 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6681 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6687 return total + COSTS_N_INSNS (1);
6689 /* Assume a two-shift sequence. Increase the cost slightly so
6690 we prefer actual shifts over an extend operation. */
6691 return total + 1 + COSTS_N_INSNS (2);
6699 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6701 enum machine_mode mode = GET_MODE (x);
6702 enum rtx_code subcode;
6704 enum rtx_code code = GET_CODE (x);
6710 /* Memory costs quite a lot for the first word, but subsequent words
6711 load at the equivalent of a single insn each. */
6712 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6719 if (TARGET_HARD_FLOAT && mode == SFmode)
6720 *total = COSTS_N_INSNS (2);
6721 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6722 *total = COSTS_N_INSNS (4);
6724 *total = COSTS_N_INSNS (20);
6728 if (GET_CODE (XEXP (x, 1)) == REG)
6729 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6730 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6731 *total = rtx_cost (XEXP (x, 1), code, speed);
6737 *total += COSTS_N_INSNS (4);
6742 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6743 *total += rtx_cost (XEXP (x, 0), code, speed);
6746 *total += COSTS_N_INSNS (3);
6750 *total += COSTS_N_INSNS (1);
6751 /* Increase the cost of complex shifts because they aren't any faster,
6752 and reduce dual issue opportunities. */
6753 if (arm_tune_cortex_a9
6754 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6762 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6763 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6764 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6766 *total += rtx_cost (XEXP (x, 1), code, speed);
6770 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6771 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6773 *total += rtx_cost (XEXP (x, 0), code, speed);
6780 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6782 if (TARGET_HARD_FLOAT
6784 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6786 *total = COSTS_N_INSNS (1);
6787 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6788 && arm_const_double_rtx (XEXP (x, 0)))
6790 *total += rtx_cost (XEXP (x, 1), code, speed);
6794 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6795 && arm_const_double_rtx (XEXP (x, 1)))
6797 *total += rtx_cost (XEXP (x, 0), code, speed);
6803 *total = COSTS_N_INSNS (20);
6807 *total = COSTS_N_INSNS (1);
6808 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6809 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6811 *total += rtx_cost (XEXP (x, 1), code, speed);
6815 subcode = GET_CODE (XEXP (x, 1));
6816 if (subcode == ASHIFT || subcode == ASHIFTRT
6817 || subcode == LSHIFTRT
6818 || subcode == ROTATE || subcode == ROTATERT)
6820 *total += rtx_cost (XEXP (x, 0), code, speed);
6821 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6825 /* A shift as a part of RSB costs no more than RSB itself. */
6826 if (GET_CODE (XEXP (x, 0)) == MULT
6827 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6829 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6830 *total += rtx_cost (XEXP (x, 1), code, speed);
6835 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6837 *total += rtx_cost (XEXP (x, 0), code, speed);
6838 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6842 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6843 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6845 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6846 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6847 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6848 *total += COSTS_N_INSNS (1);
6856 if (code == PLUS && arm_arch6 && mode == SImode
6857 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6858 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6860 *total = COSTS_N_INSNS (1);
6861 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6863 *total += rtx_cost (XEXP (x, 1), code, speed);
6867 /* MLA: All arguments must be registers. We filter out
6868 multiplication by a power of two, so that we fall down into
6870 if (GET_CODE (XEXP (x, 0)) == MULT
6871 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6873 /* The cost comes from the cost of the multiply. */
6877 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6879 if (TARGET_HARD_FLOAT
6881 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6883 *total = COSTS_N_INSNS (1);
6884 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6885 && arm_const_double_rtx (XEXP (x, 1)))
6887 *total += rtx_cost (XEXP (x, 0), code, speed);
6894 *total = COSTS_N_INSNS (20);
6898 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6899 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6901 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6902 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6903 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6904 *total += COSTS_N_INSNS (1);
6910 case AND: case XOR: case IOR:
6912 /* Normally the frame registers will be spilt into reg+const during
6913 reload, so it is a bad idea to combine them with other instructions,
6914 since then they might not be moved outside of loops. As a compromise
6915 we allow integration with ops that have a constant as their second
6917 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6918 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6919 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6920 *total = COSTS_N_INSNS (1);
6924 *total += COSTS_N_INSNS (2);
6925 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6926 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6928 *total += rtx_cost (XEXP (x, 0), code, speed);
6935 *total += COSTS_N_INSNS (1);
6936 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6937 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6939 *total += rtx_cost (XEXP (x, 0), code, speed);
6942 subcode = GET_CODE (XEXP (x, 0));
6943 if (subcode == ASHIFT || subcode == ASHIFTRT
6944 || subcode == LSHIFTRT
6945 || subcode == ROTATE || subcode == ROTATERT)
6947 *total += rtx_cost (XEXP (x, 1), code, speed);
6948 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6953 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6955 *total += rtx_cost (XEXP (x, 1), code, speed);
6956 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6960 if (subcode == UMIN || subcode == UMAX
6961 || subcode == SMIN || subcode == SMAX)
6963 *total = COSTS_N_INSNS (3);
6970 /* This should have been handled by the CPU specific routines. */
6974 if (arm_arch3m && mode == SImode
6975 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6976 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6977 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6978 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6979 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6980 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6982 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6985 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6989 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6991 if (TARGET_HARD_FLOAT
6993 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6995 *total = COSTS_N_INSNS (1);
6998 *total = COSTS_N_INSNS (2);
7004 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7005 if (mode == SImode && code == NOT)
7007 subcode = GET_CODE (XEXP (x, 0));
7008 if (subcode == ASHIFT || subcode == ASHIFTRT
7009 || subcode == LSHIFTRT
7010 || subcode == ROTATE || subcode == ROTATERT
7012 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7014 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7015 /* Register shifts cost an extra cycle. */
7016 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7017 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7026 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7028 *total = COSTS_N_INSNS (4);
7032 operand = XEXP (x, 0);
7034 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7035 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7036 && GET_CODE (XEXP (operand, 0)) == REG
7037 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7038 *total += COSTS_N_INSNS (1);
7039 *total += (rtx_cost (XEXP (x, 1), code, speed)
7040 + rtx_cost (XEXP (x, 2), code, speed));
7044 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7046 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7052 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7053 && mode == SImode && XEXP (x, 1) == const0_rtx)
7055 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7061 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7062 && mode == SImode && XEXP (x, 1) == const0_rtx)
7064 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7084 /* SCC insns. In the case where the comparison has already been
7085 performed, then they cost 2 instructions. Otherwise they need
7086 an additional comparison before them. */
7087 *total = COSTS_N_INSNS (2);
7088 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7095 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7101 *total += COSTS_N_INSNS (1);
7102 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7103 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7105 *total += rtx_cost (XEXP (x, 0), code, speed);
7109 subcode = GET_CODE (XEXP (x, 0));
7110 if (subcode == ASHIFT || subcode == ASHIFTRT
7111 || subcode == LSHIFTRT
7112 || subcode == ROTATE || subcode == ROTATERT)
7114 *total += rtx_cost (XEXP (x, 1), code, speed);
7115 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7120 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7122 *total += rtx_cost (XEXP (x, 1), code, speed);
7123 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7133 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7134 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7135 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7136 *total += rtx_cost (XEXP (x, 1), code, speed);
7140 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7142 if (TARGET_HARD_FLOAT
7144 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7146 *total = COSTS_N_INSNS (1);
7149 *total = COSTS_N_INSNS (20);
7152 *total = COSTS_N_INSNS (1);
7154 *total += COSTS_N_INSNS (3);
7160 if (GET_MODE_CLASS (mode) == MODE_INT)
7162 rtx op = XEXP (x, 0);
7163 enum machine_mode opmode = GET_MODE (op);
7166 *total += COSTS_N_INSNS (1);
7168 if (opmode != SImode)
7172 /* If !arm_arch4, we use one of the extendhisi2_mem
7173 or movhi_bytes patterns for HImode. For a QImode
7174 sign extension, we first zero-extend from memory
7175 and then perform a shift sequence. */
7176 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7177 *total += COSTS_N_INSNS (2);
7180 *total += COSTS_N_INSNS (1);
7182 /* We don't have the necessary insn, so we need to perform some
7184 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7185 /* An and with constant 255. */
7186 *total += COSTS_N_INSNS (1);
7188 /* A shift sequence. Increase costs slightly to avoid
7189 combining two shifts into an extend operation. */
7190 *total += COSTS_N_INSNS (2) + 1;
7196 switch (GET_MODE (XEXP (x, 0)))
7203 *total = COSTS_N_INSNS (1);
7213 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7217 if (const_ok_for_arm (INTVAL (x))
7218 || const_ok_for_arm (~INTVAL (x)))
7219 *total = COSTS_N_INSNS (1);
7221 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7222 INTVAL (x), NULL_RTX,
7229 *total = COSTS_N_INSNS (3);
7233 *total = COSTS_N_INSNS (1);
7237 *total = COSTS_N_INSNS (1);
7238 *total += rtx_cost (XEXP (x, 0), code, speed);
7242 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7243 && (mode == SFmode || !TARGET_VFP_SINGLE))
7244 *total = COSTS_N_INSNS (1);
7246 *total = COSTS_N_INSNS (4);
7250 *total = COSTS_N_INSNS (4);
7255 /* Estimates the size cost of thumb1 instructions.
7256 For now most of the code is copied from thumb1_rtx_costs. We need more
7257 fine grain tuning when we have more related test cases. */
7259 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7261 enum machine_mode mode = GET_MODE (x);
7274 return COSTS_N_INSNS (1);
7277 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7279 /* Thumb1 mul instruction can't operate on const. We must Load it
7280 into a register first. */
7281 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7282 return COSTS_N_INSNS (1) + const_size;
7284 return COSTS_N_INSNS (1);
7287 return (COSTS_N_INSNS (1)
7288 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7289 + GET_CODE (SET_DEST (x)) == MEM));
7294 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7295 return COSTS_N_INSNS (1);
7296 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7297 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7298 return COSTS_N_INSNS (2);
7299 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7300 if (thumb_shiftable_const (INTVAL (x)))
7301 return COSTS_N_INSNS (2);
7302 return COSTS_N_INSNS (3);
7304 else if ((outer == PLUS || outer == COMPARE)
7305 && INTVAL (x) < 256 && INTVAL (x) > -256)
7307 else if ((outer == IOR || outer == XOR || outer == AND)
7308 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7309 return COSTS_N_INSNS (1);
7310 else if (outer == AND)
7313 /* This duplicates the tests in the andsi3 expander. */
7314 for (i = 9; i <= 31; i++)
7315 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7316 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7317 return COSTS_N_INSNS (2);
7319 else if (outer == ASHIFT || outer == ASHIFTRT
7320 || outer == LSHIFTRT)
7322 return COSTS_N_INSNS (2);
7328 return COSTS_N_INSNS (3);
7346 /* XXX another guess. */
7347 /* Memory costs quite a lot for the first word, but subsequent words
7348 load at the equivalent of a single insn each. */
7349 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7350 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7355 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7360 /* XXX still guessing. */
7361 switch (GET_MODE (XEXP (x, 0)))
7364 return (1 + (mode == DImode ? 4 : 0)
7365 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7368 return (4 + (mode == DImode ? 4 : 0)
7369 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7372 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7383 /* RTX costs when optimizing for size. */
7385 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7388 enum machine_mode mode = GET_MODE (x);
7391 *total = thumb1_size_rtx_costs (x, code, outer_code);
7395 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7399 /* A memory access costs 1 insn if the mode is small, or the address is
7400 a single register, otherwise it costs one insn per word. */
7401 if (REG_P (XEXP (x, 0)))
7402 *total = COSTS_N_INSNS (1);
7404 && GET_CODE (XEXP (x, 0)) == PLUS
7405 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7406 /* This will be split into two instructions.
7407 See arm.md:calculate_pic_address. */
7408 *total = COSTS_N_INSNS (2);
7410 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7417 /* Needs a libcall, so it costs about this. */
7418 *total = COSTS_N_INSNS (2);
7422 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7424 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7432 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7434 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7437 else if (mode == SImode)
7439 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7440 /* Slightly disparage register shifts, but not by much. */
7441 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7442 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7446 /* Needs a libcall. */
7447 *total = COSTS_N_INSNS (2);
7451 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7452 && (mode == SFmode || !TARGET_VFP_SINGLE))
7454 *total = COSTS_N_INSNS (1);
7460 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7461 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7463 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7464 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7465 || subcode1 == ROTATE || subcode1 == ROTATERT
7466 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7467 || subcode1 == ASHIFTRT)
7469 /* It's just the cost of the two operands. */
7474 *total = COSTS_N_INSNS (1);
7478 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7482 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7483 && (mode == SFmode || !TARGET_VFP_SINGLE))
7485 *total = COSTS_N_INSNS (1);
7489 /* A shift as a part of ADD costs nothing. */
7490 if (GET_CODE (XEXP (x, 0)) == MULT
7491 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7493 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7494 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7495 *total += rtx_cost (XEXP (x, 1), code, false);
7500 case AND: case XOR: case IOR:
7503 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7505 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7506 || subcode == LSHIFTRT || subcode == ASHIFTRT
7507 || (code == AND && subcode == NOT))
7509 /* It's just the cost of the two operands. */
7515 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7519 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7523 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7524 && (mode == SFmode || !TARGET_VFP_SINGLE))
7526 *total = COSTS_N_INSNS (1);
7532 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7541 if (cc_register (XEXP (x, 0), VOIDmode))
7544 *total = COSTS_N_INSNS (1);
7548 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7549 && (mode == SFmode || !TARGET_VFP_SINGLE))
7550 *total = COSTS_N_INSNS (1);
7552 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7557 return arm_rtx_costs_1 (x, outer_code, total, 0);
7560 if (const_ok_for_arm (INTVAL (x)))
7561 /* A multiplication by a constant requires another instruction
7562 to load the constant to a register. */
7563 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7565 else if (const_ok_for_arm (~INTVAL (x)))
7566 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7567 else if (const_ok_for_arm (-INTVAL (x)))
7569 if (outer_code == COMPARE || outer_code == PLUS
7570 || outer_code == MINUS)
7573 *total = COSTS_N_INSNS (1);
7576 *total = COSTS_N_INSNS (2);
7582 *total = COSTS_N_INSNS (2);
7586 *total = COSTS_N_INSNS (4);
7591 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7592 cost of these slightly. */
7593 *total = COSTS_N_INSNS (1) + 1;
7597 if (mode != VOIDmode)
7598 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7600 *total = COSTS_N_INSNS (4); /* How knows? */
7605 /* RTX costs when optimizing for size. */
7607 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7611 return arm_size_rtx_costs (x, (enum rtx_code) code,
7612 (enum rtx_code) outer_code, total);
7614 return current_tune->rtx_costs (x, (enum rtx_code) code,
7615 (enum rtx_code) outer_code,
7619 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7620 supported on any "slowmul" cores, so it can be ignored. */
7623 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7624 int *total, bool speed)
7626 enum machine_mode mode = GET_MODE (x);
7630 *total = thumb1_rtx_costs (x, code, outer_code);
7637 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7640 *total = COSTS_N_INSNS (20);
7644 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7646 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7647 & (unsigned HOST_WIDE_INT) 0xffffffff);
7648 int cost, const_ok = const_ok_for_arm (i);
7649 int j, booth_unit_size;
7651 /* Tune as appropriate. */
7652 cost = const_ok ? 4 : 8;
7653 booth_unit_size = 2;
7654 for (j = 0; i && j < 32; j += booth_unit_size)
7656 i >>= booth_unit_size;
7660 *total = COSTS_N_INSNS (cost);
7661 *total += rtx_cost (XEXP (x, 0), code, speed);
7665 *total = COSTS_N_INSNS (20);
7669 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7674 /* RTX cost for cores with a fast multiply unit (M variants). */
7677 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7678 int *total, bool speed)
7680 enum machine_mode mode = GET_MODE (x);
7684 *total = thumb1_rtx_costs (x, code, outer_code);
7688 /* ??? should thumb2 use different costs? */
7692 /* There is no point basing this on the tuning, since it is always the
7693 fast variant if it exists at all. */
7695 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7696 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7697 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7699 *total = COSTS_N_INSNS(2);
7706 *total = COSTS_N_INSNS (5);
7710 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7712 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7713 & (unsigned HOST_WIDE_INT) 0xffffffff);
7714 int cost, const_ok = const_ok_for_arm (i);
7715 int j, booth_unit_size;
7717 /* Tune as appropriate. */
7718 cost = const_ok ? 4 : 8;
7719 booth_unit_size = 8;
7720 for (j = 0; i && j < 32; j += booth_unit_size)
7722 i >>= booth_unit_size;
7726 *total = COSTS_N_INSNS(cost);
7732 *total = COSTS_N_INSNS (4);
7736 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7738 if (TARGET_HARD_FLOAT
7740 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7742 *total = COSTS_N_INSNS (1);
7747 /* Requires a lib call */
7748 *total = COSTS_N_INSNS (20);
7752 return arm_rtx_costs_1 (x, outer_code, total, speed);
7757 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7758 so it can be ignored. */
7761 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7762 int *total, bool speed)
7764 enum machine_mode mode = GET_MODE (x);
7768 *total = thumb1_rtx_costs (x, code, outer_code);
7775 if (GET_CODE (XEXP (x, 0)) != MULT)
7776 return arm_rtx_costs_1 (x, outer_code, total, speed);
7778 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7779 will stall until the multiplication is complete. */
7780 *total = COSTS_N_INSNS (3);
7784 /* There is no point basing this on the tuning, since it is always the
7785 fast variant if it exists at all. */
7787 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7788 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7789 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7791 *total = COSTS_N_INSNS (2);
7798 *total = COSTS_N_INSNS (5);
7802 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7804 /* If operand 1 is a constant we can more accurately
7805 calculate the cost of the multiply. The multiplier can
7806 retire 15 bits on the first cycle and a further 12 on the
7807 second. We do, of course, have to load the constant into
7808 a register first. */
7809 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7810 /* There's a general overhead of one cycle. */
7812 unsigned HOST_WIDE_INT masked_const;
7817 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7819 masked_const = i & 0xffff8000;
7820 if (masked_const != 0)
7823 masked_const = i & 0xf8000000;
7824 if (masked_const != 0)
7827 *total = COSTS_N_INSNS (cost);
7833 *total = COSTS_N_INSNS (3);
7837 /* Requires a lib call */
7838 *total = COSTS_N_INSNS (20);
7842 return arm_rtx_costs_1 (x, outer_code, total, speed);
7847 /* RTX costs for 9e (and later) cores. */
7850 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7851 int *total, bool speed)
7853 enum machine_mode mode = GET_MODE (x);
7860 *total = COSTS_N_INSNS (3);
7864 *total = thumb1_rtx_costs (x, code, outer_code);
7872 /* There is no point basing this on the tuning, since it is always the
7873 fast variant if it exists at all. */
7875 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7876 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7877 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7879 *total = COSTS_N_INSNS (2);
7886 *total = COSTS_N_INSNS (5);
7892 *total = COSTS_N_INSNS (2);
7896 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7898 if (TARGET_HARD_FLOAT
7900 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7902 *total = COSTS_N_INSNS (1);
7907 *total = COSTS_N_INSNS (20);
7911 return arm_rtx_costs_1 (x, outer_code, total, speed);
7914 /* All address computations that can be done are free, but rtx cost returns
7915 the same for practically all of them. So we weight the different types
7916 of address here in the order (most pref first):
7917 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7919 arm_arm_address_cost (rtx x)
7921 enum rtx_code c = GET_CODE (x);
7923 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7925 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7930 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7933 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7943 arm_thumb_address_cost (rtx x)
7945 enum rtx_code c = GET_CODE (x);
7950 && GET_CODE (XEXP (x, 0)) == REG
7951 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7958 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7960 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7963 /* Adjust cost hook for XScale. */
7965 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7967 /* Some true dependencies can have a higher cost depending
7968 on precisely how certain input operands are used. */
7969 if (REG_NOTE_KIND(link) == 0
7970 && recog_memoized (insn) >= 0
7971 && recog_memoized (dep) >= 0)
7973 int shift_opnum = get_attr_shift (insn);
7974 enum attr_type attr_type = get_attr_type (dep);
7976 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7977 operand for INSN. If we have a shifted input operand and the
7978 instruction we depend on is another ALU instruction, then we may
7979 have to account for an additional stall. */
7980 if (shift_opnum != 0
7981 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7983 rtx shifted_operand;
7986 /* Get the shifted operand. */
7987 extract_insn (insn);
7988 shifted_operand = recog_data.operand[shift_opnum];
7990 /* Iterate over all the operands in DEP. If we write an operand
7991 that overlaps with SHIFTED_OPERAND, then we have increase the
7992 cost of this dependency. */
7994 preprocess_constraints ();
7995 for (opno = 0; opno < recog_data.n_operands; opno++)
7997 /* We can ignore strict inputs. */
7998 if (recog_data.operand_type[opno] == OP_IN)
8001 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8013 /* Adjust cost hook for Cortex A9. */
8015 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8017 switch (REG_NOTE_KIND (link))
8024 case REG_DEP_OUTPUT:
8025 if (recog_memoized (insn) >= 0
8026 && recog_memoized (dep) >= 0)
8028 if (GET_CODE (PATTERN (insn)) == SET)
8031 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8033 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8035 enum attr_type attr_type_insn = get_attr_type (insn);
8036 enum attr_type attr_type_dep = get_attr_type (dep);
8038 /* By default all dependencies of the form
8041 have an extra latency of 1 cycle because
8042 of the input and output dependency in this
8043 case. However this gets modeled as an true
8044 dependency and hence all these checks. */
8045 if (REG_P (SET_DEST (PATTERN (insn)))
8046 && REG_P (SET_DEST (PATTERN (dep)))
8047 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8048 SET_DEST (PATTERN (dep))))
8050 /* FMACS is a special case where the dependant
8051 instruction can be issued 3 cycles before
8052 the normal latency in case of an output
8054 if ((attr_type_insn == TYPE_FMACS
8055 || attr_type_insn == TYPE_FMACD)
8056 && (attr_type_dep == TYPE_FMACS
8057 || attr_type_dep == TYPE_FMACD))
8059 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8060 *cost = insn_default_latency (dep) - 3;
8062 *cost = insn_default_latency (dep);
8067 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8068 *cost = insn_default_latency (dep) + 1;
8070 *cost = insn_default_latency (dep);
8086 /* Adjust cost hook for FA726TE. */
8088 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8090 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8091 have penalty of 3. */
8092 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8093 && recog_memoized (insn) >= 0
8094 && recog_memoized (dep) >= 0
8095 && get_attr_conds (dep) == CONDS_SET)
8097 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8098 if (get_attr_conds (insn) == CONDS_USE
8099 && get_attr_type (insn) != TYPE_BRANCH)
8105 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8106 || get_attr_conds (insn) == CONDS_USE)
8116 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8117 It corrects the value of COST based on the relationship between
8118 INSN and DEP through the dependence LINK. It returns the new
8119 value. There is a per-core adjust_cost hook to adjust scheduler costs
8120 and the per-core hook can choose to completely override the generic
8121 adjust_cost function. Only put bits of code into arm_adjust_cost that
8122 are common across all cores. */
8124 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8128 /* When generating Thumb-1 code, we want to place flag-setting operations
8129 close to a conditional branch which depends on them, so that we can
8130 omit the comparison. */
8132 && REG_NOTE_KIND (link) == 0
8133 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8134 && recog_memoized (dep) >= 0
8135 && get_attr_conds (dep) == CONDS_SET)
8138 if (current_tune->sched_adjust_cost != NULL)
8140 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8144 /* XXX This is not strictly true for the FPA. */
8145 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8146 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8149 /* Call insns don't incur a stall, even if they follow a load. */
8150 if (REG_NOTE_KIND (link) == 0
8151 && GET_CODE (insn) == CALL_INSN)
8154 if ((i_pat = single_set (insn)) != NULL
8155 && GET_CODE (SET_SRC (i_pat)) == MEM
8156 && (d_pat = single_set (dep)) != NULL
8157 && GET_CODE (SET_DEST (d_pat)) == MEM)
8159 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8160 /* This is a load after a store, there is no conflict if the load reads
8161 from a cached area. Assume that loads from the stack, and from the
8162 constant pool are cached, and that others will miss. This is a
8165 if ((GET_CODE (src_mem) == SYMBOL_REF
8166 && CONSTANT_POOL_ADDRESS_P (src_mem))
8167 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8168 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8169 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8176 static int fp_consts_inited = 0;
8178 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8179 static const char * const strings_fp[8] =
8182 "4", "5", "0.5", "10"
8185 static REAL_VALUE_TYPE values_fp[8];
8188 init_fp_table (void)
8194 fp_consts_inited = 1;
8196 fp_consts_inited = 8;
8198 for (i = 0; i < fp_consts_inited; i++)
8200 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8205 /* Return TRUE if rtx X is a valid immediate FP constant. */
8207 arm_const_double_rtx (rtx x)
8212 if (!fp_consts_inited)
8215 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8216 if (REAL_VALUE_MINUS_ZERO (r))
8219 for (i = 0; i < fp_consts_inited; i++)
8220 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8226 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8228 neg_const_double_rtx_ok_for_fpa (rtx x)
8233 if (!fp_consts_inited)
8236 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8237 r = real_value_negate (&r);
8238 if (REAL_VALUE_MINUS_ZERO (r))
8241 for (i = 0; i < 8; i++)
8242 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8249 /* VFPv3 has a fairly wide range of representable immediates, formed from
8250 "quarter-precision" floating-point values. These can be evaluated using this
8251 formula (with ^ for exponentiation):
8255 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8256 16 <= n <= 31 and 0 <= r <= 7.
8258 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8260 - A (most-significant) is the sign bit.
8261 - BCD are the exponent (encoded as r XOR 3).
8262 - EFGH are the mantissa (encoded as n - 16).
8265 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8266 fconst[sd] instruction, or -1 if X isn't suitable. */
8268 vfp3_const_double_index (rtx x)
8270 REAL_VALUE_TYPE r, m;
8272 unsigned HOST_WIDE_INT mantissa, mant_hi;
8273 unsigned HOST_WIDE_INT mask;
8274 HOST_WIDE_INT m1, m2;
8275 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8277 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8280 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8282 /* We can't represent these things, so detect them first. */
8283 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8286 /* Extract sign, exponent and mantissa. */
8287 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8288 r = real_value_abs (&r);
8289 exponent = REAL_EXP (&r);
8290 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8291 highest (sign) bit, with a fixed binary point at bit point_pos.
8292 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8293 bits for the mantissa, this may fail (low bits would be lost). */
8294 real_ldexp (&m, &r, point_pos - exponent);
8295 REAL_VALUE_TO_INT (&m1, &m2, m);
8299 /* If there are bits set in the low part of the mantissa, we can't
8300 represent this value. */
8304 /* Now make it so that mantissa contains the most-significant bits, and move
8305 the point_pos to indicate that the least-significant bits have been
8307 point_pos -= HOST_BITS_PER_WIDE_INT;
8310 /* We can permit four significant bits of mantissa only, plus a high bit
8311 which is always 1. */
8312 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8313 if ((mantissa & mask) != 0)
8316 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8317 mantissa >>= point_pos - 5;
8319 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8320 floating-point immediate zero with Neon using an integer-zero load, but
8321 that case is handled elsewhere.) */
8325 gcc_assert (mantissa >= 16 && mantissa <= 31);
8327 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8328 normalized significands are in the range [1, 2). (Our mantissa is shifted
8329 left 4 places at this point relative to normalized IEEE754 values). GCC
8330 internally uses [0.5, 1) (see real.c), so the exponent returned from
8331 REAL_EXP must be altered. */
8332 exponent = 5 - exponent;
8334 if (exponent < 0 || exponent > 7)
8337 /* Sign, mantissa and exponent are now in the correct form to plug into the
8338 formula described in the comment above. */
8339 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8342 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8344 vfp3_const_double_rtx (rtx x)
8349 return vfp3_const_double_index (x) != -1;
8352 /* Recognize immediates which can be used in various Neon instructions. Legal
8353 immediates are described by the following table (for VMVN variants, the
8354 bitwise inverse of the constant shown is recognized. In either case, VMOV
8355 is output and the correct instruction to use for a given constant is chosen
8356 by the assembler). The constant shown is replicated across all elements of
8357 the destination vector.
8359 insn elems variant constant (binary)
8360 ---- ----- ------- -----------------
8361 vmov i32 0 00000000 00000000 00000000 abcdefgh
8362 vmov i32 1 00000000 00000000 abcdefgh 00000000
8363 vmov i32 2 00000000 abcdefgh 00000000 00000000
8364 vmov i32 3 abcdefgh 00000000 00000000 00000000
8365 vmov i16 4 00000000 abcdefgh
8366 vmov i16 5 abcdefgh 00000000
8367 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8368 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8369 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8370 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8371 vmvn i16 10 00000000 abcdefgh
8372 vmvn i16 11 abcdefgh 00000000
8373 vmov i32 12 00000000 00000000 abcdefgh 11111111
8374 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8375 vmov i32 14 00000000 abcdefgh 11111111 11111111
8376 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8378 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8379 eeeeeeee ffffffff gggggggg hhhhhhhh
8380 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8382 For case 18, B = !b. Representable values are exactly those accepted by
8383 vfp3_const_double_index, but are output as floating-point numbers rather
8386 Variants 0-5 (inclusive) may also be used as immediates for the second
8387 operand of VORR/VBIC instructions.
8389 The INVERSE argument causes the bitwise inverse of the given operand to be
8390 recognized instead (used for recognizing legal immediates for the VAND/VORN
8391 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8392 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8393 output, rather than the real insns vbic/vorr).
8395 INVERSE makes no difference to the recognition of float vectors.
8397 The return value is the variant of immediate as shown in the above table, or
8398 -1 if the given value doesn't match any of the listed patterns.
8401 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8402 rtx *modconst, int *elementwidth)
8404 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8406 for (i = 0; i < idx; i += (STRIDE)) \
8411 immtype = (CLASS); \
8412 elsize = (ELSIZE); \
8416 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8417 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8418 unsigned char bytes[16];
8419 int immtype = -1, matches;
8420 unsigned int invmask = inverse ? 0xff : 0;
8422 /* Vectors of float constants. */
8423 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8425 rtx el0 = CONST_VECTOR_ELT (op, 0);
8428 if (!vfp3_const_double_rtx (el0))
8431 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8433 for (i = 1; i < n_elts; i++)
8435 rtx elt = CONST_VECTOR_ELT (op, i);
8438 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8440 if (!REAL_VALUES_EQUAL (r0, re))
8445 *modconst = CONST_VECTOR_ELT (op, 0);
8453 /* Splat vector constant out into a byte vector. */
8454 for (i = 0; i < n_elts; i++)
8456 rtx el = CONST_VECTOR_ELT (op, i);
8457 unsigned HOST_WIDE_INT elpart;
8458 unsigned int part, parts;
8460 if (GET_CODE (el) == CONST_INT)
8462 elpart = INTVAL (el);
8465 else if (GET_CODE (el) == CONST_DOUBLE)
8467 elpart = CONST_DOUBLE_LOW (el);
8473 for (part = 0; part < parts; part++)
8476 for (byte = 0; byte < innersize; byte++)
8478 bytes[idx++] = (elpart & 0xff) ^ invmask;
8479 elpart >>= BITS_PER_UNIT;
8481 if (GET_CODE (el) == CONST_DOUBLE)
8482 elpart = CONST_DOUBLE_HIGH (el);
8487 gcc_assert (idx == GET_MODE_SIZE (mode));
8491 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8492 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8494 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8495 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8497 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8498 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8500 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8501 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8503 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8505 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8507 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8508 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8510 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8511 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8513 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8514 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8516 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8517 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8519 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8521 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8523 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8524 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8526 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8527 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8529 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8530 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8532 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8533 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8535 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8537 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8538 && bytes[i] == bytes[(i + 8) % idx]);
8546 *elementwidth = elsize;
8550 unsigned HOST_WIDE_INT imm = 0;
8552 /* Un-invert bytes of recognized vector, if necessary. */
8554 for (i = 0; i < idx; i++)
8555 bytes[i] ^= invmask;
8559 /* FIXME: Broken on 32-bit H_W_I hosts. */
8560 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8562 for (i = 0; i < 8; i++)
8563 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8564 << (i * BITS_PER_UNIT);
8566 *modconst = GEN_INT (imm);
8570 unsigned HOST_WIDE_INT imm = 0;
8572 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8573 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8575 *modconst = GEN_INT (imm);
8583 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8584 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8585 float elements), and a modified constant (whatever should be output for a
8586 VMOV) in *MODCONST. */
8589 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8590 rtx *modconst, int *elementwidth)
8594 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8600 *modconst = tmpconst;
8603 *elementwidth = tmpwidth;
8608 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8609 the immediate is valid, write a constant suitable for using as an operand
8610 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8611 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8614 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8615 rtx *modconst, int *elementwidth)
8619 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8621 if (retval < 0 || retval > 5)
8625 *modconst = tmpconst;
8628 *elementwidth = tmpwidth;
8633 /* Return a string suitable for output of Neon immediate logic operation
8637 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8638 int inverse, int quad)
8640 int width, is_valid;
8641 static char templ[40];
8643 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8645 gcc_assert (is_valid != 0);
8648 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8650 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8655 /* Output a sequence of pairwise operations to implement a reduction.
8656 NOTE: We do "too much work" here, because pairwise operations work on two
8657 registers-worth of operands in one go. Unfortunately we can't exploit those
8658 extra calculations to do the full operation in fewer steps, I don't think.
8659 Although all vector elements of the result but the first are ignored, we
8660 actually calculate the same result in each of the elements. An alternative
8661 such as initially loading a vector with zero to use as each of the second
8662 operands would use up an additional register and take an extra instruction,
8663 for no particular gain. */
8666 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8667 rtx (*reduc) (rtx, rtx, rtx))
8669 enum machine_mode inner = GET_MODE_INNER (mode);
8670 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8673 for (i = parts / 2; i >= 1; i /= 2)
8675 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8676 emit_insn (reduc (dest, tmpsum, tmpsum));
8681 /* If VALS is a vector constant that can be loaded into a register
8682 using VDUP, generate instructions to do so and return an RTX to
8683 assign to the register. Otherwise return NULL_RTX. */
8686 neon_vdup_constant (rtx vals)
8688 enum machine_mode mode = GET_MODE (vals);
8689 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8690 int n_elts = GET_MODE_NUNITS (mode);
8691 bool all_same = true;
8695 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8698 for (i = 0; i < n_elts; ++i)
8700 x = XVECEXP (vals, 0, i);
8701 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8706 /* The elements are not all the same. We could handle repeating
8707 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8708 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8712 /* We can load this constant by using VDUP and a constant in a
8713 single ARM register. This will be cheaper than a vector
8716 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8717 return gen_rtx_VEC_DUPLICATE (mode, x);
8720 /* Generate code to load VALS, which is a PARALLEL containing only
8721 constants (for vec_init) or CONST_VECTOR, efficiently into a
8722 register. Returns an RTX to copy into the register, or NULL_RTX
8723 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8726 neon_make_constant (rtx vals)
8728 enum machine_mode mode = GET_MODE (vals);
8730 rtx const_vec = NULL_RTX;
8731 int n_elts = GET_MODE_NUNITS (mode);
8735 if (GET_CODE (vals) == CONST_VECTOR)
8737 else if (GET_CODE (vals) == PARALLEL)
8739 /* A CONST_VECTOR must contain only CONST_INTs and
8740 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8741 Only store valid constants in a CONST_VECTOR. */
8742 for (i = 0; i < n_elts; ++i)
8744 rtx x = XVECEXP (vals, 0, i);
8745 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8748 if (n_const == n_elts)
8749 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8754 if (const_vec != NULL
8755 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8756 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8758 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8759 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8760 pipeline cycle; creating the constant takes one or two ARM
8763 else if (const_vec != NULL_RTX)
8764 /* Load from constant pool. On Cortex-A8 this takes two cycles
8765 (for either double or quad vectors). We can not take advantage
8766 of single-cycle VLD1 because we need a PC-relative addressing
8770 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8771 We can not construct an initializer. */
8775 /* Initialize vector TARGET to VALS. */
8778 neon_expand_vector_init (rtx target, rtx vals)
8780 enum machine_mode mode = GET_MODE (target);
8781 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8782 int n_elts = GET_MODE_NUNITS (mode);
8783 int n_var = 0, one_var = -1;
8784 bool all_same = true;
8788 for (i = 0; i < n_elts; ++i)
8790 x = XVECEXP (vals, 0, i);
8791 if (!CONSTANT_P (x))
8792 ++n_var, one_var = i;
8794 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8800 rtx constant = neon_make_constant (vals);
8801 if (constant != NULL_RTX)
8803 emit_move_insn (target, constant);
8808 /* Splat a single non-constant element if we can. */
8809 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8811 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8812 emit_insn (gen_rtx_SET (VOIDmode, target,
8813 gen_rtx_VEC_DUPLICATE (mode, x)));
8817 /* One field is non-constant. Load constant then overwrite varying
8818 field. This is more efficient than using the stack. */
8821 rtx copy = copy_rtx (vals);
8822 rtx index = GEN_INT (one_var);
8824 /* Load constant part of vector, substitute neighboring value for
8826 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8827 neon_expand_vector_init (target, copy);
8829 /* Insert variable. */
8830 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8834 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8837 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8840 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8843 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8846 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8849 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8852 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8855 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8858 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8866 /* Construct the vector in memory one field at a time
8867 and load the whole vector. */
8868 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8869 for (i = 0; i < n_elts; i++)
8870 emit_move_insn (adjust_address_nv (mem, inner_mode,
8871 i * GET_MODE_SIZE (inner_mode)),
8872 XVECEXP (vals, 0, i));
8873 emit_move_insn (target, mem);
8876 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8877 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8878 reported source locations are bogus. */
8881 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8886 gcc_assert (GET_CODE (operand) == CONST_INT);
8888 lane = INTVAL (operand);
8890 if (lane < low || lane >= high)
8894 /* Bounds-check lanes. */
8897 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8899 bounds_check (operand, low, high, "lane out of range");
8902 /* Bounds-check constants. */
8905 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8907 bounds_check (operand, low, high, "constant out of range");
8911 neon_element_bits (enum machine_mode mode)
8914 return GET_MODE_BITSIZE (mode);
8916 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8920 /* Predicates for `match_operand' and `match_operator'. */
8922 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8924 cirrus_memory_offset (rtx op)
8926 /* Reject eliminable registers. */
8927 if (! (reload_in_progress || reload_completed)
8928 && ( reg_mentioned_p (frame_pointer_rtx, op)
8929 || reg_mentioned_p (arg_pointer_rtx, op)
8930 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8931 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8932 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8933 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8936 if (GET_CODE (op) == MEM)
8942 /* Match: (mem (reg)). */
8943 if (GET_CODE (ind) == REG)
8949 if (GET_CODE (ind) == PLUS
8950 && GET_CODE (XEXP (ind, 0)) == REG
8951 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8952 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8959 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8960 WB is true if full writeback address modes are allowed and is false
8961 if limited writeback address modes (POST_INC and PRE_DEC) are
8965 arm_coproc_mem_operand (rtx op, bool wb)
8969 /* Reject eliminable registers. */
8970 if (! (reload_in_progress || reload_completed)
8971 && ( reg_mentioned_p (frame_pointer_rtx, op)
8972 || reg_mentioned_p (arg_pointer_rtx, op)
8973 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8974 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8975 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8976 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8979 /* Constants are converted into offsets from labels. */
8980 if (GET_CODE (op) != MEM)
8985 if (reload_completed
8986 && (GET_CODE (ind) == LABEL_REF
8987 || (GET_CODE (ind) == CONST
8988 && GET_CODE (XEXP (ind, 0)) == PLUS
8989 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8990 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8993 /* Match: (mem (reg)). */
8994 if (GET_CODE (ind) == REG)
8995 return arm_address_register_rtx_p (ind, 0);
8997 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8998 acceptable in any case (subject to verification by
8999 arm_address_register_rtx_p). We need WB to be true to accept
9000 PRE_INC and POST_DEC. */
9001 if (GET_CODE (ind) == POST_INC
9002 || GET_CODE (ind) == PRE_DEC
9004 && (GET_CODE (ind) == PRE_INC
9005 || GET_CODE (ind) == POST_DEC)))
9006 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9009 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9010 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9011 && GET_CODE (XEXP (ind, 1)) == PLUS
9012 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9013 ind = XEXP (ind, 1);
9018 if (GET_CODE (ind) == PLUS
9019 && GET_CODE (XEXP (ind, 0)) == REG
9020 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9021 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9022 && INTVAL (XEXP (ind, 1)) > -1024
9023 && INTVAL (XEXP (ind, 1)) < 1024
9024 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9030 /* Return TRUE if OP is a memory operand which we can load or store a vector
9031 to/from. TYPE is one of the following values:
9032 0 - Vector load/stor (vldr)
9033 1 - Core registers (ldm)
9034 2 - Element/structure loads (vld1)
9037 neon_vector_mem_operand (rtx op, int type)
9041 /* Reject eliminable registers. */
9042 if (! (reload_in_progress || reload_completed)
9043 && ( reg_mentioned_p (frame_pointer_rtx, op)
9044 || reg_mentioned_p (arg_pointer_rtx, op)
9045 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9046 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9047 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9048 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9051 /* Constants are converted into offsets from labels. */
9052 if (GET_CODE (op) != MEM)
9057 if (reload_completed
9058 && (GET_CODE (ind) == LABEL_REF
9059 || (GET_CODE (ind) == CONST
9060 && GET_CODE (XEXP (ind, 0)) == PLUS
9061 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9062 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9065 /* Match: (mem (reg)). */
9066 if (GET_CODE (ind) == REG)
9067 return arm_address_register_rtx_p (ind, 0);
9069 /* Allow post-increment with Neon registers. */
9070 if ((type != 1 && GET_CODE (ind) == POST_INC)
9071 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9072 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9074 /* FIXME: vld1 allows register post-modify. */
9080 && GET_CODE (ind) == PLUS
9081 && GET_CODE (XEXP (ind, 0)) == REG
9082 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9083 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9084 && INTVAL (XEXP (ind, 1)) > -1024
9085 && INTVAL (XEXP (ind, 1)) < 1016
9086 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9092 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9095 neon_struct_mem_operand (rtx op)
9099 /* Reject eliminable registers. */
9100 if (! (reload_in_progress || reload_completed)
9101 && ( reg_mentioned_p (frame_pointer_rtx, op)
9102 || reg_mentioned_p (arg_pointer_rtx, op)
9103 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9104 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9105 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9106 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9109 /* Constants are converted into offsets from labels. */
9110 if (GET_CODE (op) != MEM)
9115 if (reload_completed
9116 && (GET_CODE (ind) == LABEL_REF
9117 || (GET_CODE (ind) == CONST
9118 && GET_CODE (XEXP (ind, 0)) == PLUS
9119 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9120 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9123 /* Match: (mem (reg)). */
9124 if (GET_CODE (ind) == REG)
9125 return arm_address_register_rtx_p (ind, 0);
9130 /* Return true if X is a register that will be eliminated later on. */
9132 arm_eliminable_register (rtx x)
9134 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9135 || REGNO (x) == ARG_POINTER_REGNUM
9136 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9137 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9140 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9141 coprocessor registers. Otherwise return NO_REGS. */
9144 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9148 if (!TARGET_NEON_FP16)
9149 return GENERAL_REGS;
9150 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9152 return GENERAL_REGS;
9155 /* The neon move patterns handle all legitimate vector and struct
9159 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9160 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9161 || VALID_NEON_STRUCT_MODE (mode)))
9164 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9167 return GENERAL_REGS;
9170 /* Values which must be returned in the most-significant end of the return
9174 arm_return_in_msb (const_tree valtype)
9176 return (TARGET_AAPCS_BASED
9178 && (AGGREGATE_TYPE_P (valtype)
9179 || TREE_CODE (valtype) == COMPLEX_TYPE));
9182 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9183 Use by the Cirrus Maverick code which has to workaround
9184 a hardware bug triggered by such instructions. */
9186 arm_memory_load_p (rtx insn)
9188 rtx body, lhs, rhs;;
9190 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9193 body = PATTERN (insn);
9195 if (GET_CODE (body) != SET)
9198 lhs = XEXP (body, 0);
9199 rhs = XEXP (body, 1);
9201 lhs = REG_OR_SUBREG_RTX (lhs);
9203 /* If the destination is not a general purpose
9204 register we do not have to worry. */
9205 if (GET_CODE (lhs) != REG
9206 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9209 /* As well as loads from memory we also have to react
9210 to loads of invalid constants which will be turned
9211 into loads from the minipool. */
9212 return (GET_CODE (rhs) == MEM
9213 || GET_CODE (rhs) == SYMBOL_REF
9214 || note_invalid_constants (insn, -1, false));
9217 /* Return TRUE if INSN is a Cirrus instruction. */
9219 arm_cirrus_insn_p (rtx insn)
9221 enum attr_cirrus attr;
9223 /* get_attr cannot accept USE or CLOBBER. */
9225 || GET_CODE (insn) != INSN
9226 || GET_CODE (PATTERN (insn)) == USE
9227 || GET_CODE (PATTERN (insn)) == CLOBBER)
9230 attr = get_attr_cirrus (insn);
9232 return attr != CIRRUS_NOT;
9235 /* Cirrus reorg for invalid instruction combinations. */
9237 cirrus_reorg (rtx first)
9239 enum attr_cirrus attr;
9240 rtx body = PATTERN (first);
9244 /* Any branch must be followed by 2 non Cirrus instructions. */
9245 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9248 t = next_nonnote_insn (first);
9250 if (arm_cirrus_insn_p (t))
9253 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9257 emit_insn_after (gen_nop (), first);
9262 /* (float (blah)) is in parallel with a clobber. */
9263 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9264 body = XVECEXP (body, 0, 0);
9266 if (GET_CODE (body) == SET)
9268 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9270 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9271 be followed by a non Cirrus insn. */
9272 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9274 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9275 emit_insn_after (gen_nop (), first);
9279 else if (arm_memory_load_p (first))
9281 unsigned int arm_regno;
9283 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9284 ldr/cfmv64hr combination where the Rd field is the same
9285 in both instructions must be split with a non Cirrus
9292 /* Get Arm register number for ldr insn. */
9293 if (GET_CODE (lhs) == REG)
9294 arm_regno = REGNO (lhs);
9297 gcc_assert (GET_CODE (rhs) == REG);
9298 arm_regno = REGNO (rhs);
9302 first = next_nonnote_insn (first);
9304 if (! arm_cirrus_insn_p (first))
9307 body = PATTERN (first);
9309 /* (float (blah)) is in parallel with a clobber. */
9310 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9311 body = XVECEXP (body, 0, 0);
9313 if (GET_CODE (body) == FLOAT)
9314 body = XEXP (body, 0);
9316 if (get_attr_cirrus (first) == CIRRUS_MOVE
9317 && GET_CODE (XEXP (body, 1)) == REG
9318 && arm_regno == REGNO (XEXP (body, 1)))
9319 emit_insn_after (gen_nop (), first);
9325 /* get_attr cannot accept USE or CLOBBER. */
9327 || GET_CODE (first) != INSN
9328 || GET_CODE (PATTERN (first)) == USE
9329 || GET_CODE (PATTERN (first)) == CLOBBER)
9332 attr = get_attr_cirrus (first);
9334 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9335 must be followed by a non-coprocessor instruction. */
9336 if (attr == CIRRUS_COMPARE)
9340 t = next_nonnote_insn (first);
9342 if (arm_cirrus_insn_p (t))
9345 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9349 emit_insn_after (gen_nop (), first);
9355 /* Return TRUE if X references a SYMBOL_REF. */
9357 symbol_mentioned_p (rtx x)
9362 if (GET_CODE (x) == SYMBOL_REF)
9365 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9366 are constant offsets, not symbols. */
9367 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9370 fmt = GET_RTX_FORMAT (GET_CODE (x));
9372 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9378 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9379 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9382 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9389 /* Return TRUE if X references a LABEL_REF. */
9391 label_mentioned_p (rtx x)
9396 if (GET_CODE (x) == LABEL_REF)
9399 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9400 instruction, but they are constant offsets, not symbols. */
9401 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9404 fmt = GET_RTX_FORMAT (GET_CODE (x));
9405 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9411 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9412 if (label_mentioned_p (XVECEXP (x, i, j)))
9415 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9423 tls_mentioned_p (rtx x)
9425 switch (GET_CODE (x))
9428 return tls_mentioned_p (XEXP (x, 0));
9431 if (XINT (x, 1) == UNSPEC_TLS)
9439 /* Must not copy any rtx that uses a pc-relative address. */
9442 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9444 if (GET_CODE (*x) == UNSPEC
9445 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9451 arm_cannot_copy_insn_p (rtx insn)
9453 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9459 enum rtx_code code = GET_CODE (x);
9476 /* Return 1 if memory locations are adjacent. */
9478 adjacent_mem_locations (rtx a, rtx b)
9480 /* We don't guarantee to preserve the order of these memory refs. */
9481 if (volatile_refs_p (a) || volatile_refs_p (b))
9484 if ((GET_CODE (XEXP (a, 0)) == REG
9485 || (GET_CODE (XEXP (a, 0)) == PLUS
9486 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9487 && (GET_CODE (XEXP (b, 0)) == REG
9488 || (GET_CODE (XEXP (b, 0)) == PLUS
9489 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9491 HOST_WIDE_INT val0 = 0, val1 = 0;
9495 if (GET_CODE (XEXP (a, 0)) == PLUS)
9497 reg0 = XEXP (XEXP (a, 0), 0);
9498 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9503 if (GET_CODE (XEXP (b, 0)) == PLUS)
9505 reg1 = XEXP (XEXP (b, 0), 0);
9506 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9511 /* Don't accept any offset that will require multiple
9512 instructions to handle, since this would cause the
9513 arith_adjacentmem pattern to output an overlong sequence. */
9514 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9517 /* Don't allow an eliminable register: register elimination can make
9518 the offset too large. */
9519 if (arm_eliminable_register (reg0))
9522 val_diff = val1 - val0;
9526 /* If the target has load delay slots, then there's no benefit
9527 to using an ldm instruction unless the offset is zero and
9528 we are optimizing for size. */
9529 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9530 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9531 && (val_diff == 4 || val_diff == -4));
9534 return ((REGNO (reg0) == REGNO (reg1))
9535 && (val_diff == 4 || val_diff == -4));
9541 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9542 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9543 instruction. ADD_OFFSET is nonzero if the base address register needs
9544 to be modified with an add instruction before we can use it. */
9547 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9548 int nops, HOST_WIDE_INT add_offset)
9550 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9551 if the offset isn't small enough. The reason 2 ldrs are faster
9552 is because these ARMs are able to do more than one cache access
9553 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9554 whilst the ARM8 has a double bandwidth cache. This means that
9555 these cores can do both an instruction fetch and a data fetch in
9556 a single cycle, so the trick of calculating the address into a
9557 scratch register (one of the result regs) and then doing a load
9558 multiple actually becomes slower (and no smaller in code size).
9559 That is the transformation
9561 ldr rd1, [rbase + offset]
9562 ldr rd2, [rbase + offset + 4]
9566 add rd1, rbase, offset
9567 ldmia rd1, {rd1, rd2}
9569 produces worse code -- '3 cycles + any stalls on rd2' instead of
9570 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9571 access per cycle, the first sequence could never complete in less
9572 than 6 cycles, whereas the ldm sequence would only take 5 and
9573 would make better use of sequential accesses if not hitting the
9576 We cheat here and test 'arm_ld_sched' which we currently know to
9577 only be true for the ARM8, ARM9 and StrongARM. If this ever
9578 changes, then the test below needs to be reworked. */
9579 if (nops == 2 && arm_ld_sched && add_offset != 0)
9582 /* XScale has load-store double instructions, but they have stricter
9583 alignment requirements than load-store multiple, so we cannot
9586 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9587 the pipeline until completion.
9595 An ldr instruction takes 1-3 cycles, but does not block the
9604 Best case ldr will always win. However, the more ldr instructions
9605 we issue, the less likely we are to be able to schedule them well.
9606 Using ldr instructions also increases code size.
9608 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9609 for counts of 3 or 4 regs. */
9610 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9615 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9616 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9617 an array ORDER which describes the sequence to use when accessing the
9618 offsets that produces an ascending order. In this sequence, each
9619 offset must be larger by exactly 4 than the previous one. ORDER[0]
9620 must have been filled in with the lowest offset by the caller.
9621 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9622 we use to verify that ORDER produces an ascending order of registers.
9623 Return true if it was possible to construct such an order, false if
9627 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9631 for (i = 1; i < nops; i++)
9635 order[i] = order[i - 1];
9636 for (j = 0; j < nops; j++)
9637 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9639 /* We must find exactly one offset that is higher than the
9640 previous one by 4. */
9641 if (order[i] != order[i - 1])
9645 if (order[i] == order[i - 1])
9647 /* The register numbers must be ascending. */
9648 if (unsorted_regs != NULL
9649 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9655 /* Used to determine in a peephole whether a sequence of load
9656 instructions can be changed into a load-multiple instruction.
9657 NOPS is the number of separate load instructions we are examining. The
9658 first NOPS entries in OPERANDS are the destination registers, the
9659 next NOPS entries are memory operands. If this function is
9660 successful, *BASE is set to the common base register of the memory
9661 accesses; *LOAD_OFFSET is set to the first memory location's offset
9662 from that base register.
9663 REGS is an array filled in with the destination register numbers.
9664 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9665 insn numbers to to an ascending order of stores. If CHECK_REGS is true,
9666 the sequence of registers in REGS matches the loads from ascending memory
9667 locations, and the function verifies that the register numbers are
9668 themselves ascending. If CHECK_REGS is false, the register numbers
9669 are stored in the order they are found in the operands. */
9671 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9672 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9674 int unsorted_regs[MAX_LDM_STM_OPS];
9675 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9676 int order[MAX_LDM_STM_OPS];
9677 rtx base_reg_rtx = NULL;
9681 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9682 easily extended if required. */
9683 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9685 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9687 /* Loop over the operands and check that the memory references are
9688 suitable (i.e. immediate offsets from the same base register). At
9689 the same time, extract the target register, and the memory
9691 for (i = 0; i < nops; i++)
9696 /* Convert a subreg of a mem into the mem itself. */
9697 if (GET_CODE (operands[nops + i]) == SUBREG)
9698 operands[nops + i] = alter_subreg (operands + (nops + i));
9700 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9702 /* Don't reorder volatile memory references; it doesn't seem worth
9703 looking for the case where the order is ok anyway. */
9704 if (MEM_VOLATILE_P (operands[nops + i]))
9707 offset = const0_rtx;
9709 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9710 || (GET_CODE (reg) == SUBREG
9711 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9712 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9713 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9715 || (GET_CODE (reg) == SUBREG
9716 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9717 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9722 base_reg = REGNO (reg);
9724 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9727 else if (base_reg != (int) REGNO (reg))
9728 /* Not addressed from the same base register. */
9731 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9732 ? REGNO (operands[i])
9733 : REGNO (SUBREG_REG (operands[i])));
9735 /* If it isn't an integer register, or if it overwrites the
9736 base register but isn't the last insn in the list, then
9737 we can't do this. */
9738 if (unsorted_regs[i] < 0
9739 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9740 || unsorted_regs[i] > 14
9741 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9744 unsorted_offsets[i] = INTVAL (offset);
9745 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9749 /* Not a suitable memory address. */
9753 /* All the useful information has now been extracted from the
9754 operands into unsorted_regs and unsorted_offsets; additionally,
9755 order[0] has been set to the lowest offset in the list. Sort
9756 the offsets into order, verifying that they are adjacent, and
9757 check that the register numbers are ascending. */
9758 if (!compute_offset_order (nops, unsorted_offsets, order,
9759 check_regs ? unsorted_regs : NULL))
9763 memcpy (saved_order, order, sizeof order);
9769 for (i = 0; i < nops; i++)
9770 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9772 *load_offset = unsorted_offsets[order[0]];
9776 && !peep2_reg_dead_p (nops, base_reg_rtx))
9779 if (unsorted_offsets[order[0]] == 0)
9780 ldm_case = 1; /* ldmia */
9781 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9782 ldm_case = 2; /* ldmib */
9783 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9784 ldm_case = 3; /* ldmda */
9785 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9786 ldm_case = 4; /* ldmdb */
9787 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9788 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9793 if (!multiple_operation_profitable_p (false, nops,
9795 ? unsorted_offsets[order[0]] : 0))
9801 /* Used to determine in a peephole whether a sequence of store instructions can
9802 be changed into a store-multiple instruction.
9803 NOPS is the number of separate store instructions we are examining.
9804 NOPS_TOTAL is the total number of instructions recognized by the peephole
9806 The first NOPS entries in OPERANDS are the source registers, the next
9807 NOPS entries are memory operands. If this function is successful, *BASE is
9808 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9809 to the first memory location's offset from that base register. REGS is an
9810 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9811 likewise filled with the corresponding rtx's.
9812 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9813 numbers to to an ascending order of stores.
9814 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9815 from ascending memory locations, and the function verifies that the register
9816 numbers are themselves ascending. If CHECK_REGS is false, the register
9817 numbers are stored in the order they are found in the operands. */
9819 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9820 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9821 HOST_WIDE_INT *load_offset, bool check_regs)
9823 int unsorted_regs[MAX_LDM_STM_OPS];
9824 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9825 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9826 int order[MAX_LDM_STM_OPS];
9828 rtx base_reg_rtx = NULL;
9831 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9832 easily extended if required. */
9833 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9835 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9837 /* Loop over the operands and check that the memory references are
9838 suitable (i.e. immediate offsets from the same base register). At
9839 the same time, extract the target register, and the memory
9841 for (i = 0; i < nops; i++)
9846 /* Convert a subreg of a mem into the mem itself. */
9847 if (GET_CODE (operands[nops + i]) == SUBREG)
9848 operands[nops + i] = alter_subreg (operands + (nops + i));
9850 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9852 /* Don't reorder volatile memory references; it doesn't seem worth
9853 looking for the case where the order is ok anyway. */
9854 if (MEM_VOLATILE_P (operands[nops + i]))
9857 offset = const0_rtx;
9859 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9860 || (GET_CODE (reg) == SUBREG
9861 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9862 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9863 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9865 || (GET_CODE (reg) == SUBREG
9866 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9867 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9870 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9871 ? operands[i] : SUBREG_REG (operands[i]));
9872 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9876 base_reg = REGNO (reg);
9878 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9881 else if (base_reg != (int) REGNO (reg))
9882 /* Not addressed from the same base register. */
9885 /* If it isn't an integer register, then we can't do this. */
9886 if (unsorted_regs[i] < 0
9887 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9888 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9889 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9890 || unsorted_regs[i] > 14)
9893 unsorted_offsets[i] = INTVAL (offset);
9894 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9898 /* Not a suitable memory address. */
9902 /* All the useful information has now been extracted from the
9903 operands into unsorted_regs and unsorted_offsets; additionally,
9904 order[0] has been set to the lowest offset in the list. Sort
9905 the offsets into order, verifying that they are adjacent, and
9906 check that the register numbers are ascending. */
9907 if (!compute_offset_order (nops, unsorted_offsets, order,
9908 check_regs ? unsorted_regs : NULL))
9912 memcpy (saved_order, order, sizeof order);
9918 for (i = 0; i < nops; i++)
9920 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9922 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9925 *load_offset = unsorted_offsets[order[0]];
9929 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9932 if (unsorted_offsets[order[0]] == 0)
9933 stm_case = 1; /* stmia */
9934 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9935 stm_case = 2; /* stmib */
9936 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9937 stm_case = 3; /* stmda */
9938 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9939 stm_case = 4; /* stmdb */
9943 if (!multiple_operation_profitable_p (false, nops, 0))
9949 /* Routines for use in generating RTL. */
9951 /* Generate a load-multiple instruction. COUNT is the number of loads in
9952 the instruction; REGS and MEMS are arrays containing the operands.
9953 BASEREG is the base register to be used in addressing the memory operands.
9954 WBACK_OFFSET is nonzero if the instruction should update the base
9958 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9959 HOST_WIDE_INT wback_offset)
9964 if (!multiple_operation_profitable_p (false, count, 0))
9970 for (i = 0; i < count; i++)
9971 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9973 if (wback_offset != 0)
9974 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9982 result = gen_rtx_PARALLEL (VOIDmode,
9983 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9984 if (wback_offset != 0)
9986 XVECEXP (result, 0, 0)
9987 = gen_rtx_SET (VOIDmode, basereg,
9988 plus_constant (basereg, wback_offset));
9993 for (j = 0; i < count; i++, j++)
9994 XVECEXP (result, 0, i)
9995 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10000 /* Generate a store-multiple instruction. COUNT is the number of stores in
10001 the instruction; REGS and MEMS are arrays containing the operands.
10002 BASEREG is the base register to be used in addressing the memory operands.
10003 WBACK_OFFSET is nonzero if the instruction should update the base
10007 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10008 HOST_WIDE_INT wback_offset)
10013 if (GET_CODE (basereg) == PLUS)
10014 basereg = XEXP (basereg, 0);
10016 if (!multiple_operation_profitable_p (false, count, 0))
10022 for (i = 0; i < count; i++)
10023 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10025 if (wback_offset != 0)
10026 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10028 seq = get_insns ();
10034 result = gen_rtx_PARALLEL (VOIDmode,
10035 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10036 if (wback_offset != 0)
10038 XVECEXP (result, 0, 0)
10039 = gen_rtx_SET (VOIDmode, basereg,
10040 plus_constant (basereg, wback_offset));
10045 for (j = 0; i < count; i++, j++)
10046 XVECEXP (result, 0, i)
10047 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10052 /* Generate either a load-multiple or a store-multiple instruction. This
10053 function can be used in situations where we can start with a single MEM
10054 rtx and adjust its address upwards.
10055 COUNT is the number of operations in the instruction, not counting a
10056 possible update of the base register. REGS is an array containing the
10058 BASEREG is the base register to be used in addressing the memory operands,
10059 which are constructed from BASEMEM.
10060 WRITE_BACK specifies whether the generated instruction should include an
10061 update of the base register.
10062 OFFSETP is used to pass an offset to and from this function; this offset
10063 is not used when constructing the address (instead BASEMEM should have an
10064 appropriate offset in its address), it is used only for setting
10065 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10068 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10069 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10071 rtx mems[MAX_LDM_STM_OPS];
10072 HOST_WIDE_INT offset = *offsetp;
10075 gcc_assert (count <= MAX_LDM_STM_OPS);
10077 if (GET_CODE (basereg) == PLUS)
10078 basereg = XEXP (basereg, 0);
10080 for (i = 0; i < count; i++)
10082 rtx addr = plus_constant (basereg, i * 4);
10083 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10091 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10092 write_back ? 4 * count : 0);
10094 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10095 write_back ? 4 * count : 0);
10099 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10100 rtx basemem, HOST_WIDE_INT *offsetp)
10102 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10107 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10108 rtx basemem, HOST_WIDE_INT *offsetp)
10110 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10114 /* Called from a peephole2 expander to turn a sequence of loads into an
10115 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10116 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10117 is true if we can reorder the registers because they are used commutatively
10119 Returns true iff we could generate a new instruction. */
10122 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10124 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10125 rtx mems[MAX_LDM_STM_OPS];
10126 int i, j, base_reg;
10128 HOST_WIDE_INT offset;
10129 int write_back = FALSE;
10133 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10134 &base_reg, &offset, !sort_regs);
10140 for (i = 0; i < nops - 1; i++)
10141 for (j = i + 1; j < nops; j++)
10142 if (regs[i] > regs[j])
10148 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10152 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10153 gcc_assert (ldm_case == 1 || ldm_case == 5);
10159 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10160 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10162 if (!TARGET_THUMB1)
10164 base_reg = regs[0];
10165 base_reg_rtx = newbase;
10169 for (i = 0; i < nops; i++)
10171 addr = plus_constant (base_reg_rtx, offset + i * 4);
10172 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10175 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10176 write_back ? offset + i * 4 : 0));
10180 /* Called from a peephole2 expander to turn a sequence of stores into an
10181 STM instruction. OPERANDS are the operands found by the peephole matcher;
10182 NOPS indicates how many separate stores we are trying to combine.
10183 Returns true iff we could generate a new instruction. */
10186 gen_stm_seq (rtx *operands, int nops)
10189 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10190 rtx mems[MAX_LDM_STM_OPS];
10193 HOST_WIDE_INT offset;
10194 int write_back = FALSE;
10197 bool base_reg_dies;
10199 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10200 mem_order, &base_reg, &offset, true);
10205 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10207 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10210 gcc_assert (base_reg_dies);
10216 gcc_assert (base_reg_dies);
10217 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10221 addr = plus_constant (base_reg_rtx, offset);
10223 for (i = 0; i < nops; i++)
10225 addr = plus_constant (base_reg_rtx, offset + i * 4);
10226 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10229 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10230 write_back ? offset + i * 4 : 0));
10234 /* Called from a peephole2 expander to turn a sequence of stores that are
10235 preceded by constant loads into an STM instruction. OPERANDS are the
10236 operands found by the peephole matcher; NOPS indicates how many
10237 separate stores we are trying to combine; there are 2 * NOPS
10238 instructions in the peephole.
10239 Returns true iff we could generate a new instruction. */
10242 gen_const_stm_seq (rtx *operands, int nops)
10244 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10245 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10246 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10247 rtx mems[MAX_LDM_STM_OPS];
10250 HOST_WIDE_INT offset;
10251 int write_back = FALSE;
10254 bool base_reg_dies;
10256 HARD_REG_SET allocated;
10258 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10259 mem_order, &base_reg, &offset, false);
10264 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10266 /* If the same register is used more than once, try to find a free
10268 CLEAR_HARD_REG_SET (allocated);
10269 for (i = 0; i < nops; i++)
10271 for (j = i + 1; j < nops; j++)
10272 if (regs[i] == regs[j])
10274 rtx t = peep2_find_free_register (0, nops * 2,
10275 TARGET_THUMB1 ? "l" : "r",
10276 SImode, &allocated);
10280 regs[i] = REGNO (t);
10284 /* Compute an ordering that maps the register numbers to an ascending
10287 for (i = 0; i < nops; i++)
10288 if (regs[i] < regs[reg_order[0]])
10291 for (i = 1; i < nops; i++)
10293 int this_order = reg_order[i - 1];
10294 for (j = 0; j < nops; j++)
10295 if (regs[j] > regs[reg_order[i - 1]]
10296 && (this_order == reg_order[i - 1]
10297 || regs[j] < regs[this_order]))
10299 reg_order[i] = this_order;
10302 /* Ensure that registers that must be live after the instruction end
10303 up with the correct value. */
10304 for (i = 0; i < nops; i++)
10306 int this_order = reg_order[i];
10307 if ((this_order != mem_order[i]
10308 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10309 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10313 /* Load the constants. */
10314 for (i = 0; i < nops; i++)
10316 rtx op = operands[2 * nops + mem_order[i]];
10317 sorted_regs[i] = regs[reg_order[i]];
10318 emit_move_insn (reg_rtxs[reg_order[i]], op);
10321 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10323 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10326 gcc_assert (base_reg_dies);
10332 gcc_assert (base_reg_dies);
10333 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10337 addr = plus_constant (base_reg_rtx, offset);
10339 for (i = 0; i < nops; i++)
10341 addr = plus_constant (base_reg_rtx, offset + i * 4);
10342 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10345 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10346 write_back ? offset + i * 4 : 0));
10351 arm_gen_movmemqi (rtx *operands)
10353 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10354 HOST_WIDE_INT srcoffset, dstoffset;
10356 rtx src, dst, srcbase, dstbase;
10357 rtx part_bytes_reg = NULL;
10360 if (GET_CODE (operands[2]) != CONST_INT
10361 || GET_CODE (operands[3]) != CONST_INT
10362 || INTVAL (operands[2]) > 64
10363 || INTVAL (operands[3]) & 3)
10366 dstbase = operands[0];
10367 srcbase = operands[1];
10369 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10370 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10372 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10373 out_words_to_go = INTVAL (operands[2]) / 4;
10374 last_bytes = INTVAL (operands[2]) & 3;
10375 dstoffset = srcoffset = 0;
10377 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10378 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10380 for (i = 0; in_words_to_go >= 2; i+=4)
10382 if (in_words_to_go > 4)
10383 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10384 TRUE, srcbase, &srcoffset));
10386 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10387 src, FALSE, srcbase,
10390 if (out_words_to_go)
10392 if (out_words_to_go > 4)
10393 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10394 TRUE, dstbase, &dstoffset));
10395 else if (out_words_to_go != 1)
10396 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10397 out_words_to_go, dst,
10400 dstbase, &dstoffset));
10403 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10404 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10405 if (last_bytes != 0)
10407 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10413 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10414 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10417 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10418 if (out_words_to_go)
10422 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10423 sreg = copy_to_reg (mem);
10425 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10426 emit_move_insn (mem, sreg);
10429 gcc_assert (!in_words_to_go); /* Sanity check */
10432 if (in_words_to_go)
10434 gcc_assert (in_words_to_go > 0);
10436 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10437 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10440 gcc_assert (!last_bytes || part_bytes_reg);
10442 if (BYTES_BIG_ENDIAN && last_bytes)
10444 rtx tmp = gen_reg_rtx (SImode);
10446 /* The bytes we want are in the top end of the word. */
10447 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10448 GEN_INT (8 * (4 - last_bytes))));
10449 part_bytes_reg = tmp;
10453 mem = adjust_automodify_address (dstbase, QImode,
10454 plus_constant (dst, last_bytes - 1),
10455 dstoffset + last_bytes - 1);
10456 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10460 tmp = gen_reg_rtx (SImode);
10461 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10462 part_bytes_reg = tmp;
10469 if (last_bytes > 1)
10471 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10472 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10476 rtx tmp = gen_reg_rtx (SImode);
10477 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10478 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10479 part_bytes_reg = tmp;
10486 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10487 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10494 /* Select a dominance comparison mode if possible for a test of the general
10495 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10496 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10497 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10498 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10499 In all cases OP will be either EQ or NE, but we don't need to know which
10500 here. If we are unable to support a dominance comparison we return
10501 CC mode. This will then fail to match for the RTL expressions that
10502 generate this call. */
10504 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10506 enum rtx_code cond1, cond2;
10509 /* Currently we will probably get the wrong result if the individual
10510 comparisons are not simple. This also ensures that it is safe to
10511 reverse a comparison if necessary. */
10512 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10514 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10518 /* The if_then_else variant of this tests the second condition if the
10519 first passes, but is true if the first fails. Reverse the first
10520 condition to get a true "inclusive-or" expression. */
10521 if (cond_or == DOM_CC_NX_OR_Y)
10522 cond1 = reverse_condition (cond1);
10524 /* If the comparisons are not equal, and one doesn't dominate the other,
10525 then we can't do this. */
10527 && !comparison_dominates_p (cond1, cond2)
10528 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10533 enum rtx_code temp = cond1;
10541 if (cond_or == DOM_CC_X_AND_Y)
10546 case EQ: return CC_DEQmode;
10547 case LE: return CC_DLEmode;
10548 case LEU: return CC_DLEUmode;
10549 case GE: return CC_DGEmode;
10550 case GEU: return CC_DGEUmode;
10551 default: gcc_unreachable ();
10555 if (cond_or == DOM_CC_X_AND_Y)
10567 gcc_unreachable ();
10571 if (cond_or == DOM_CC_X_AND_Y)
10583 gcc_unreachable ();
10587 if (cond_or == DOM_CC_X_AND_Y)
10588 return CC_DLTUmode;
10593 return CC_DLTUmode;
10595 return CC_DLEUmode;
10599 gcc_unreachable ();
10603 if (cond_or == DOM_CC_X_AND_Y)
10604 return CC_DGTUmode;
10609 return CC_DGTUmode;
10611 return CC_DGEUmode;
10615 gcc_unreachable ();
10618 /* The remaining cases only occur when both comparisons are the
10621 gcc_assert (cond1 == cond2);
10625 gcc_assert (cond1 == cond2);
10629 gcc_assert (cond1 == cond2);
10633 gcc_assert (cond1 == cond2);
10634 return CC_DLEUmode;
10637 gcc_assert (cond1 == cond2);
10638 return CC_DGEUmode;
10641 gcc_unreachable ();
10646 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10648 /* All floating point compares return CCFP if it is an equality
10649 comparison, and CCFPE otherwise. */
10650 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10670 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10675 gcc_unreachable ();
10679 /* A compare with a shifted operand. Because of canonicalization, the
10680 comparison will have to be swapped when we emit the assembler. */
10681 if (GET_MODE (y) == SImode
10682 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10683 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10684 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10685 || GET_CODE (x) == ROTATERT))
10688 /* This operation is performed swapped, but since we only rely on the Z
10689 flag we don't need an additional mode. */
10690 if (GET_MODE (y) == SImode
10691 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10692 && GET_CODE (x) == NEG
10693 && (op == EQ || op == NE))
10696 /* This is a special case that is used by combine to allow a
10697 comparison of a shifted byte load to be split into a zero-extend
10698 followed by a comparison of the shifted integer (only valid for
10699 equalities and unsigned inequalities). */
10700 if (GET_MODE (x) == SImode
10701 && GET_CODE (x) == ASHIFT
10702 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10703 && GET_CODE (XEXP (x, 0)) == SUBREG
10704 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10705 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10706 && (op == EQ || op == NE
10707 || op == GEU || op == GTU || op == LTU || op == LEU)
10708 && GET_CODE (y) == CONST_INT)
10711 /* A construct for a conditional compare, if the false arm contains
10712 0, then both conditions must be true, otherwise either condition
10713 must be true. Not all conditions are possible, so CCmode is
10714 returned if it can't be done. */
10715 if (GET_CODE (x) == IF_THEN_ELSE
10716 && (XEXP (x, 2) == const0_rtx
10717 || XEXP (x, 2) == const1_rtx)
10718 && COMPARISON_P (XEXP (x, 0))
10719 && COMPARISON_P (XEXP (x, 1)))
10720 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10721 INTVAL (XEXP (x, 2)));
10723 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10724 if (GET_CODE (x) == AND
10725 && (op == EQ || op == NE)
10726 && COMPARISON_P (XEXP (x, 0))
10727 && COMPARISON_P (XEXP (x, 1)))
10728 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10731 if (GET_CODE (x) == IOR
10732 && (op == EQ || op == NE)
10733 && COMPARISON_P (XEXP (x, 0))
10734 && COMPARISON_P (XEXP (x, 1)))
10735 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10738 /* An operation (on Thumb) where we want to test for a single bit.
10739 This is done by shifting that bit up into the top bit of a
10740 scratch register; we can then branch on the sign bit. */
10742 && GET_MODE (x) == SImode
10743 && (op == EQ || op == NE)
10744 && GET_CODE (x) == ZERO_EXTRACT
10745 && XEXP (x, 1) == const1_rtx)
10748 /* An operation that sets the condition codes as a side-effect, the
10749 V flag is not set correctly, so we can only use comparisons where
10750 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10752 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10753 if (GET_MODE (x) == SImode
10755 && (op == EQ || op == NE || op == LT || op == GE)
10756 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10757 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10758 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10759 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10760 || GET_CODE (x) == LSHIFTRT
10761 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10762 || GET_CODE (x) == ROTATERT
10763 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10764 return CC_NOOVmode;
10766 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10769 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10770 && GET_CODE (x) == PLUS
10771 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10774 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10776 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10778 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10785 /* A DImode comparison against zero can be implemented by
10786 or'ing the two halves together. */
10787 if (y == const0_rtx)
10790 /* We can do an equality test in three Thumb instructions. */
10800 /* DImode unsigned comparisons can be implemented by cmp +
10801 cmpeq without a scratch register. Not worth doing in
10812 /* DImode signed and unsigned comparisons can be implemented
10813 by cmp + sbcs with a scratch register, but that does not
10814 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10815 gcc_assert (op != EQ && op != NE);
10819 gcc_unreachable ();
10826 /* X and Y are two things to compare using CODE. Emit the compare insn and
10827 return the rtx for register 0 in the proper mode. FP means this is a
10828 floating point compare: I don't think that it is needed on the arm. */
10830 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10832 enum machine_mode mode;
10834 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10836 /* We might have X as a constant, Y as a register because of the predicates
10837 used for cmpdi. If so, force X to a register here. */
10838 if (dimode_comparison && !REG_P (x))
10839 x = force_reg (DImode, x);
10841 mode = SELECT_CC_MODE (code, x, y);
10842 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10844 if (dimode_comparison
10845 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10846 && mode != CC_CZmode)
10850 /* To compare two non-zero values for equality, XOR them and
10851 then compare against zero. Not used for ARM mode; there
10852 CC_CZmode is cheaper. */
10853 if (mode == CC_Zmode && y != const0_rtx)
10855 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10858 /* A scratch register is required. */
10859 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10860 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10861 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10864 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10869 /* Generate a sequence of insns that will generate the correct return
10870 address mask depending on the physical architecture that the program
10873 arm_gen_return_addr_mask (void)
10875 rtx reg = gen_reg_rtx (Pmode);
10877 emit_insn (gen_return_addr_mask (reg));
10882 arm_reload_in_hi (rtx *operands)
10884 rtx ref = operands[1];
10886 HOST_WIDE_INT offset = 0;
10888 if (GET_CODE (ref) == SUBREG)
10890 offset = SUBREG_BYTE (ref);
10891 ref = SUBREG_REG (ref);
10894 if (GET_CODE (ref) == REG)
10896 /* We have a pseudo which has been spilt onto the stack; there
10897 are two cases here: the first where there is a simple
10898 stack-slot replacement and a second where the stack-slot is
10899 out of range, or is used as a subreg. */
10900 if (reg_equiv_mem (REGNO (ref)))
10902 ref = reg_equiv_mem (REGNO (ref));
10903 base = find_replacement (&XEXP (ref, 0));
10906 /* The slot is out of range, or was dressed up in a SUBREG. */
10907 base = reg_equiv_address (REGNO (ref));
10910 base = find_replacement (&XEXP (ref, 0));
10912 /* Handle the case where the address is too complex to be offset by 1. */
10913 if (GET_CODE (base) == MINUS
10914 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10916 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10918 emit_set_insn (base_plus, base);
10921 else if (GET_CODE (base) == PLUS)
10923 /* The addend must be CONST_INT, or we would have dealt with it above. */
10924 HOST_WIDE_INT hi, lo;
10926 offset += INTVAL (XEXP (base, 1));
10927 base = XEXP (base, 0);
10929 /* Rework the address into a legal sequence of insns. */
10930 /* Valid range for lo is -4095 -> 4095 */
10933 : -((-offset) & 0xfff));
10935 /* Corner case, if lo is the max offset then we would be out of range
10936 once we have added the additional 1 below, so bump the msb into the
10937 pre-loading insn(s). */
10941 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10942 ^ (HOST_WIDE_INT) 0x80000000)
10943 - (HOST_WIDE_INT) 0x80000000);
10945 gcc_assert (hi + lo == offset);
10949 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10951 /* Get the base address; addsi3 knows how to handle constants
10952 that require more than one insn. */
10953 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10959 /* Operands[2] may overlap operands[0] (though it won't overlap
10960 operands[1]), that's why we asked for a DImode reg -- so we can
10961 use the bit that does not overlap. */
10962 if (REGNO (operands[2]) == REGNO (operands[0]))
10963 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10965 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10967 emit_insn (gen_zero_extendqisi2 (scratch,
10968 gen_rtx_MEM (QImode,
10969 plus_constant (base,
10971 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10972 gen_rtx_MEM (QImode,
10973 plus_constant (base,
10975 if (!BYTES_BIG_ENDIAN)
10976 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10977 gen_rtx_IOR (SImode,
10980 gen_rtx_SUBREG (SImode, operands[0], 0),
10984 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10985 gen_rtx_IOR (SImode,
10986 gen_rtx_ASHIFT (SImode, scratch,
10988 gen_rtx_SUBREG (SImode, operands[0], 0)));
10991 /* Handle storing a half-word to memory during reload by synthesizing as two
10992 byte stores. Take care not to clobber the input values until after we
10993 have moved them somewhere safe. This code assumes that if the DImode
10994 scratch in operands[2] overlaps either the input value or output address
10995 in some way, then that value must die in this insn (we absolutely need
10996 two scratch registers for some corner cases). */
10998 arm_reload_out_hi (rtx *operands)
11000 rtx ref = operands[0];
11001 rtx outval = operands[1];
11003 HOST_WIDE_INT offset = 0;
11005 if (GET_CODE (ref) == SUBREG)
11007 offset = SUBREG_BYTE (ref);
11008 ref = SUBREG_REG (ref);
11011 if (GET_CODE (ref) == REG)
11013 /* We have a pseudo which has been spilt onto the stack; there
11014 are two cases here: the first where there is a simple
11015 stack-slot replacement and a second where the stack-slot is
11016 out of range, or is used as a subreg. */
11017 if (reg_equiv_mem (REGNO (ref)))
11019 ref = reg_equiv_mem (REGNO (ref));
11020 base = find_replacement (&XEXP (ref, 0));
11023 /* The slot is out of range, or was dressed up in a SUBREG. */
11024 base = reg_equiv_address (REGNO (ref));
11027 base = find_replacement (&XEXP (ref, 0));
11029 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11031 /* Handle the case where the address is too complex to be offset by 1. */
11032 if (GET_CODE (base) == MINUS
11033 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11035 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11037 /* Be careful not to destroy OUTVAL. */
11038 if (reg_overlap_mentioned_p (base_plus, outval))
11040 /* Updating base_plus might destroy outval, see if we can
11041 swap the scratch and base_plus. */
11042 if (!reg_overlap_mentioned_p (scratch, outval))
11045 scratch = base_plus;
11050 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11052 /* Be conservative and copy OUTVAL into the scratch now,
11053 this should only be necessary if outval is a subreg
11054 of something larger than a word. */
11055 /* XXX Might this clobber base? I can't see how it can,
11056 since scratch is known to overlap with OUTVAL, and
11057 must be wider than a word. */
11058 emit_insn (gen_movhi (scratch_hi, outval));
11059 outval = scratch_hi;
11063 emit_set_insn (base_plus, base);
11066 else if (GET_CODE (base) == PLUS)
11068 /* The addend must be CONST_INT, or we would have dealt with it above. */
11069 HOST_WIDE_INT hi, lo;
11071 offset += INTVAL (XEXP (base, 1));
11072 base = XEXP (base, 0);
11074 /* Rework the address into a legal sequence of insns. */
11075 /* Valid range for lo is -4095 -> 4095 */
11078 : -((-offset) & 0xfff));
11080 /* Corner case, if lo is the max offset then we would be out of range
11081 once we have added the additional 1 below, so bump the msb into the
11082 pre-loading insn(s). */
11086 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11087 ^ (HOST_WIDE_INT) 0x80000000)
11088 - (HOST_WIDE_INT) 0x80000000);
11090 gcc_assert (hi + lo == offset);
11094 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11096 /* Be careful not to destroy OUTVAL. */
11097 if (reg_overlap_mentioned_p (base_plus, outval))
11099 /* Updating base_plus might destroy outval, see if we
11100 can swap the scratch and base_plus. */
11101 if (!reg_overlap_mentioned_p (scratch, outval))
11104 scratch = base_plus;
11109 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11111 /* Be conservative and copy outval into scratch now,
11112 this should only be necessary if outval is a
11113 subreg of something larger than a word. */
11114 /* XXX Might this clobber base? I can't see how it
11115 can, since scratch is known to overlap with
11117 emit_insn (gen_movhi (scratch_hi, outval));
11118 outval = scratch_hi;
11122 /* Get the base address; addsi3 knows how to handle constants
11123 that require more than one insn. */
11124 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11130 if (BYTES_BIG_ENDIAN)
11132 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11133 plus_constant (base, offset + 1)),
11134 gen_lowpart (QImode, outval)));
11135 emit_insn (gen_lshrsi3 (scratch,
11136 gen_rtx_SUBREG (SImode, outval, 0),
11138 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11139 gen_lowpart (QImode, scratch)));
11143 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11144 gen_lowpart (QImode, outval)));
11145 emit_insn (gen_lshrsi3 (scratch,
11146 gen_rtx_SUBREG (SImode, outval, 0),
11148 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11149 plus_constant (base, offset + 1)),
11150 gen_lowpart (QImode, scratch)));
11154 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11155 (padded to the size of a word) should be passed in a register. */
11158 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11160 if (TARGET_AAPCS_BASED)
11161 return must_pass_in_stack_var_size (mode, type);
11163 return must_pass_in_stack_var_size_or_pad (mode, type);
11167 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11168 Return true if an argument passed on the stack should be padded upwards,
11169 i.e. if the least-significant byte has useful data.
11170 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11171 aggregate types are placed in the lowest memory address. */
11174 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
11176 if (!TARGET_AAPCS_BASED)
11177 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11179 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11186 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11187 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11188 byte of the register has useful data, and return the opposite if the
11189 most significant byte does.
11190 For AAPCS, small aggregates and small complex types are always padded
11194 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
11195 tree type, int first ATTRIBUTE_UNUSED)
11197 if (TARGET_AAPCS_BASED
11198 && BYTES_BIG_ENDIAN
11199 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
11200 && int_size_in_bytes (type) <= 4)
11203 /* Otherwise, use default padding. */
11204 return !BYTES_BIG_ENDIAN;
11208 /* Print a symbolic form of X to the debug file, F. */
11210 arm_print_value (FILE *f, rtx x)
11212 switch (GET_CODE (x))
11215 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11219 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11227 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11229 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11230 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11238 fprintf (f, "\"%s\"", XSTR (x, 0));
11242 fprintf (f, "`%s'", XSTR (x, 0));
11246 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11250 arm_print_value (f, XEXP (x, 0));
11254 arm_print_value (f, XEXP (x, 0));
11256 arm_print_value (f, XEXP (x, 1));
11264 fprintf (f, "????");
11269 /* Routines for manipulation of the constant pool. */
11271 /* Arm instructions cannot load a large constant directly into a
11272 register; they have to come from a pc relative load. The constant
11273 must therefore be placed in the addressable range of the pc
11274 relative load. Depending on the precise pc relative load
11275 instruction the range is somewhere between 256 bytes and 4k. This
11276 means that we often have to dump a constant inside a function, and
11277 generate code to branch around it.
11279 It is important to minimize this, since the branches will slow
11280 things down and make the code larger.
11282 Normally we can hide the table after an existing unconditional
11283 branch so that there is no interruption of the flow, but in the
11284 worst case the code looks like this:
11302 We fix this by performing a scan after scheduling, which notices
11303 which instructions need to have their operands fetched from the
11304 constant table and builds the table.
11306 The algorithm starts by building a table of all the constants that
11307 need fixing up and all the natural barriers in the function (places
11308 where a constant table can be dropped without breaking the flow).
11309 For each fixup we note how far the pc-relative replacement will be
11310 able to reach and the offset of the instruction into the function.
11312 Having built the table we then group the fixes together to form
11313 tables that are as large as possible (subject to addressing
11314 constraints) and emit each table of constants after the last
11315 barrier that is within range of all the instructions in the group.
11316 If a group does not contain a barrier, then we forcibly create one
11317 by inserting a jump instruction into the flow. Once the table has
11318 been inserted, the insns are then modified to reference the
11319 relevant entry in the pool.
11321 Possible enhancements to the algorithm (not implemented) are:
11323 1) For some processors and object formats, there may be benefit in
11324 aligning the pools to the start of cache lines; this alignment
11325 would need to be taken into account when calculating addressability
11328 /* These typedefs are located at the start of this file, so that
11329 they can be used in the prototypes there. This comment is to
11330 remind readers of that fact so that the following structures
11331 can be understood more easily.
11333 typedef struct minipool_node Mnode;
11334 typedef struct minipool_fixup Mfix; */
11336 struct minipool_node
11338 /* Doubly linked chain of entries. */
11341 /* The maximum offset into the code that this entry can be placed. While
11342 pushing fixes for forward references, all entries are sorted in order
11343 of increasing max_address. */
11344 HOST_WIDE_INT max_address;
11345 /* Similarly for an entry inserted for a backwards ref. */
11346 HOST_WIDE_INT min_address;
11347 /* The number of fixes referencing this entry. This can become zero
11348 if we "unpush" an entry. In this case we ignore the entry when we
11349 come to emit the code. */
11351 /* The offset from the start of the minipool. */
11352 HOST_WIDE_INT offset;
11353 /* The value in table. */
11355 /* The mode of value. */
11356 enum machine_mode mode;
11357 /* The size of the value. With iWMMXt enabled
11358 sizes > 4 also imply an alignment of 8-bytes. */
11362 struct minipool_fixup
11366 HOST_WIDE_INT address;
11368 enum machine_mode mode;
11372 HOST_WIDE_INT forwards;
11373 HOST_WIDE_INT backwards;
11376 /* Fixes less than a word need padding out to a word boundary. */
11377 #define MINIPOOL_FIX_SIZE(mode) \
11378 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11380 static Mnode * minipool_vector_head;
11381 static Mnode * minipool_vector_tail;
11382 static rtx minipool_vector_label;
11383 static int minipool_pad;
11385 /* The linked list of all minipool fixes required for this function. */
11386 Mfix * minipool_fix_head;
11387 Mfix * minipool_fix_tail;
11388 /* The fix entry for the current minipool, once it has been placed. */
11389 Mfix * minipool_barrier;
11391 /* Determines if INSN is the start of a jump table. Returns the end
11392 of the TABLE or NULL_RTX. */
11394 is_jump_table (rtx insn)
11398 if (GET_CODE (insn) == JUMP_INSN
11399 && JUMP_LABEL (insn) != NULL
11400 && ((table = next_real_insn (JUMP_LABEL (insn)))
11401 == next_real_insn (insn))
11403 && GET_CODE (table) == JUMP_INSN
11404 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11405 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11411 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11412 #define JUMP_TABLES_IN_TEXT_SECTION 0
11415 static HOST_WIDE_INT
11416 get_jump_table_size (rtx insn)
11418 /* ADDR_VECs only take room if read-only data does into the text
11420 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11422 rtx body = PATTERN (insn);
11423 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11424 HOST_WIDE_INT size;
11425 HOST_WIDE_INT modesize;
11427 modesize = GET_MODE_SIZE (GET_MODE (body));
11428 size = modesize * XVECLEN (body, elt);
11432 /* Round up size of TBB table to a halfword boundary. */
11433 size = (size + 1) & ~(HOST_WIDE_INT)1;
11436 /* No padding necessary for TBH. */
11439 /* Add two bytes for alignment on Thumb. */
11444 gcc_unreachable ();
11452 /* Move a minipool fix MP from its current location to before MAX_MP.
11453 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11454 constraints may need updating. */
11456 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11457 HOST_WIDE_INT max_address)
11459 /* The code below assumes these are different. */
11460 gcc_assert (mp != max_mp);
11462 if (max_mp == NULL)
11464 if (max_address < mp->max_address)
11465 mp->max_address = max_address;
11469 if (max_address > max_mp->max_address - mp->fix_size)
11470 mp->max_address = max_mp->max_address - mp->fix_size;
11472 mp->max_address = max_address;
11474 /* Unlink MP from its current position. Since max_mp is non-null,
11475 mp->prev must be non-null. */
11476 mp->prev->next = mp->next;
11477 if (mp->next != NULL)
11478 mp->next->prev = mp->prev;
11480 minipool_vector_tail = mp->prev;
11482 /* Re-insert it before MAX_MP. */
11484 mp->prev = max_mp->prev;
11487 if (mp->prev != NULL)
11488 mp->prev->next = mp;
11490 minipool_vector_head = mp;
11493 /* Save the new entry. */
11496 /* Scan over the preceding entries and adjust their addresses as
11498 while (mp->prev != NULL
11499 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11501 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11508 /* Add a constant to the minipool for a forward reference. Returns the
11509 node added or NULL if the constant will not fit in this pool. */
11511 add_minipool_forward_ref (Mfix *fix)
11513 /* If set, max_mp is the first pool_entry that has a lower
11514 constraint than the one we are trying to add. */
11515 Mnode * max_mp = NULL;
11516 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11519 /* If the minipool starts before the end of FIX->INSN then this FIX
11520 can not be placed into the current pool. Furthermore, adding the
11521 new constant pool entry may cause the pool to start FIX_SIZE bytes
11523 if (minipool_vector_head &&
11524 (fix->address + get_attr_length (fix->insn)
11525 >= minipool_vector_head->max_address - fix->fix_size))
11528 /* Scan the pool to see if a constant with the same value has
11529 already been added. While we are doing this, also note the
11530 location where we must insert the constant if it doesn't already
11532 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11534 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11535 && fix->mode == mp->mode
11536 && (GET_CODE (fix->value) != CODE_LABEL
11537 || (CODE_LABEL_NUMBER (fix->value)
11538 == CODE_LABEL_NUMBER (mp->value)))
11539 && rtx_equal_p (fix->value, mp->value))
11541 /* More than one fix references this entry. */
11543 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11546 /* Note the insertion point if necessary. */
11548 && mp->max_address > max_address)
11551 /* If we are inserting an 8-bytes aligned quantity and
11552 we have not already found an insertion point, then
11553 make sure that all such 8-byte aligned quantities are
11554 placed at the start of the pool. */
11555 if (ARM_DOUBLEWORD_ALIGN
11557 && fix->fix_size >= 8
11558 && mp->fix_size < 8)
11561 max_address = mp->max_address;
11565 /* The value is not currently in the minipool, so we need to create
11566 a new entry for it. If MAX_MP is NULL, the entry will be put on
11567 the end of the list since the placement is less constrained than
11568 any existing entry. Otherwise, we insert the new fix before
11569 MAX_MP and, if necessary, adjust the constraints on the other
11572 mp->fix_size = fix->fix_size;
11573 mp->mode = fix->mode;
11574 mp->value = fix->value;
11576 /* Not yet required for a backwards ref. */
11577 mp->min_address = -65536;
11579 if (max_mp == NULL)
11581 mp->max_address = max_address;
11583 mp->prev = minipool_vector_tail;
11585 if (mp->prev == NULL)
11587 minipool_vector_head = mp;
11588 minipool_vector_label = gen_label_rtx ();
11591 mp->prev->next = mp;
11593 minipool_vector_tail = mp;
11597 if (max_address > max_mp->max_address - mp->fix_size)
11598 mp->max_address = max_mp->max_address - mp->fix_size;
11600 mp->max_address = max_address;
11603 mp->prev = max_mp->prev;
11605 if (mp->prev != NULL)
11606 mp->prev->next = mp;
11608 minipool_vector_head = mp;
11611 /* Save the new entry. */
11614 /* Scan over the preceding entries and adjust their addresses as
11616 while (mp->prev != NULL
11617 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11619 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11627 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11628 HOST_WIDE_INT min_address)
11630 HOST_WIDE_INT offset;
11632 /* The code below assumes these are different. */
11633 gcc_assert (mp != min_mp);
11635 if (min_mp == NULL)
11637 if (min_address > mp->min_address)
11638 mp->min_address = min_address;
11642 /* We will adjust this below if it is too loose. */
11643 mp->min_address = min_address;
11645 /* Unlink MP from its current position. Since min_mp is non-null,
11646 mp->next must be non-null. */
11647 mp->next->prev = mp->prev;
11648 if (mp->prev != NULL)
11649 mp->prev->next = mp->next;
11651 minipool_vector_head = mp->next;
11653 /* Reinsert it after MIN_MP. */
11655 mp->next = min_mp->next;
11657 if (mp->next != NULL)
11658 mp->next->prev = mp;
11660 minipool_vector_tail = mp;
11666 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11668 mp->offset = offset;
11669 if (mp->refcount > 0)
11670 offset += mp->fix_size;
11672 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11673 mp->next->min_address = mp->min_address + mp->fix_size;
11679 /* Add a constant to the minipool for a backward reference. Returns the
11680 node added or NULL if the constant will not fit in this pool.
11682 Note that the code for insertion for a backwards reference can be
11683 somewhat confusing because the calculated offsets for each fix do
11684 not take into account the size of the pool (which is still under
11687 add_minipool_backward_ref (Mfix *fix)
11689 /* If set, min_mp is the last pool_entry that has a lower constraint
11690 than the one we are trying to add. */
11691 Mnode *min_mp = NULL;
11692 /* This can be negative, since it is only a constraint. */
11693 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11696 /* If we can't reach the current pool from this insn, or if we can't
11697 insert this entry at the end of the pool without pushing other
11698 fixes out of range, then we don't try. This ensures that we
11699 can't fail later on. */
11700 if (min_address >= minipool_barrier->address
11701 || (minipool_vector_tail->min_address + fix->fix_size
11702 >= minipool_barrier->address))
11705 /* Scan the pool to see if a constant with the same value has
11706 already been added. While we are doing this, also note the
11707 location where we must insert the constant if it doesn't already
11709 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11711 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11712 && fix->mode == mp->mode
11713 && (GET_CODE (fix->value) != CODE_LABEL
11714 || (CODE_LABEL_NUMBER (fix->value)
11715 == CODE_LABEL_NUMBER (mp->value)))
11716 && rtx_equal_p (fix->value, mp->value)
11717 /* Check that there is enough slack to move this entry to the
11718 end of the table (this is conservative). */
11719 && (mp->max_address
11720 > (minipool_barrier->address
11721 + minipool_vector_tail->offset
11722 + minipool_vector_tail->fix_size)))
11725 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11728 if (min_mp != NULL)
11729 mp->min_address += fix->fix_size;
11732 /* Note the insertion point if necessary. */
11733 if (mp->min_address < min_address)
11735 /* For now, we do not allow the insertion of 8-byte alignment
11736 requiring nodes anywhere but at the start of the pool. */
11737 if (ARM_DOUBLEWORD_ALIGN
11738 && fix->fix_size >= 8 && mp->fix_size < 8)
11743 else if (mp->max_address
11744 < minipool_barrier->address + mp->offset + fix->fix_size)
11746 /* Inserting before this entry would push the fix beyond
11747 its maximum address (which can happen if we have
11748 re-located a forwards fix); force the new fix to come
11750 if (ARM_DOUBLEWORD_ALIGN
11751 && fix->fix_size >= 8 && mp->fix_size < 8)
11756 min_address = mp->min_address + fix->fix_size;
11759 /* Do not insert a non-8-byte aligned quantity before 8-byte
11760 aligned quantities. */
11761 else if (ARM_DOUBLEWORD_ALIGN
11762 && fix->fix_size < 8
11763 && mp->fix_size >= 8)
11766 min_address = mp->min_address + fix->fix_size;
11771 /* We need to create a new entry. */
11773 mp->fix_size = fix->fix_size;
11774 mp->mode = fix->mode;
11775 mp->value = fix->value;
11777 mp->max_address = minipool_barrier->address + 65536;
11779 mp->min_address = min_address;
11781 if (min_mp == NULL)
11784 mp->next = minipool_vector_head;
11786 if (mp->next == NULL)
11788 minipool_vector_tail = mp;
11789 minipool_vector_label = gen_label_rtx ();
11792 mp->next->prev = mp;
11794 minipool_vector_head = mp;
11798 mp->next = min_mp->next;
11802 if (mp->next != NULL)
11803 mp->next->prev = mp;
11805 minipool_vector_tail = mp;
11808 /* Save the new entry. */
11816 /* Scan over the following entries and adjust their offsets. */
11817 while (mp->next != NULL)
11819 if (mp->next->min_address < mp->min_address + mp->fix_size)
11820 mp->next->min_address = mp->min_address + mp->fix_size;
11823 mp->next->offset = mp->offset + mp->fix_size;
11825 mp->next->offset = mp->offset;
11834 assign_minipool_offsets (Mfix *barrier)
11836 HOST_WIDE_INT offset = 0;
11839 minipool_barrier = barrier;
11841 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11843 mp->offset = offset;
11845 if (mp->refcount > 0)
11846 offset += mp->fix_size;
11850 /* Output the literal table */
11852 dump_minipool (rtx scan)
11858 if (ARM_DOUBLEWORD_ALIGN)
11859 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11860 if (mp->refcount > 0 && mp->fix_size >= 8)
11867 fprintf (dump_file,
11868 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11869 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11871 scan = emit_label_after (gen_label_rtx (), scan);
11872 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11873 scan = emit_label_after (minipool_vector_label, scan);
11875 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11877 if (mp->refcount > 0)
11881 fprintf (dump_file,
11882 ";; Offset %u, min %ld, max %ld ",
11883 (unsigned) mp->offset, (unsigned long) mp->min_address,
11884 (unsigned long) mp->max_address);
11885 arm_print_value (dump_file, mp->value);
11886 fputc ('\n', dump_file);
11889 switch (mp->fix_size)
11891 #ifdef HAVE_consttable_1
11893 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11897 #ifdef HAVE_consttable_2
11899 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11903 #ifdef HAVE_consttable_4
11905 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11909 #ifdef HAVE_consttable_8
11911 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11915 #ifdef HAVE_consttable_16
11917 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11922 gcc_unreachable ();
11930 minipool_vector_head = minipool_vector_tail = NULL;
11931 scan = emit_insn_after (gen_consttable_end (), scan);
11932 scan = emit_barrier_after (scan);
11935 /* Return the cost of forcibly inserting a barrier after INSN. */
11937 arm_barrier_cost (rtx insn)
11939 /* Basing the location of the pool on the loop depth is preferable,
11940 but at the moment, the basic block information seems to be
11941 corrupt by this stage of the compilation. */
11942 int base_cost = 50;
11943 rtx next = next_nonnote_insn (insn);
11945 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11948 switch (GET_CODE (insn))
11951 /* It will always be better to place the table before the label, rather
11960 return base_cost - 10;
11963 return base_cost + 10;
11967 /* Find the best place in the insn stream in the range
11968 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11969 Create the barrier by inserting a jump and add a new fix entry for
11972 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11974 HOST_WIDE_INT count = 0;
11976 rtx from = fix->insn;
11977 /* The instruction after which we will insert the jump. */
11978 rtx selected = NULL;
11980 /* The address at which the jump instruction will be placed. */
11981 HOST_WIDE_INT selected_address;
11983 HOST_WIDE_INT max_count = max_address - fix->address;
11984 rtx label = gen_label_rtx ();
11986 selected_cost = arm_barrier_cost (from);
11987 selected_address = fix->address;
11989 while (from && count < max_count)
11994 /* This code shouldn't have been called if there was a natural barrier
11996 gcc_assert (GET_CODE (from) != BARRIER);
11998 /* Count the length of this insn. */
11999 count += get_attr_length (from);
12001 /* If there is a jump table, add its length. */
12002 tmp = is_jump_table (from);
12005 count += get_jump_table_size (tmp);
12007 /* Jump tables aren't in a basic block, so base the cost on
12008 the dispatch insn. If we select this location, we will
12009 still put the pool after the table. */
12010 new_cost = arm_barrier_cost (from);
12012 if (count < max_count
12013 && (!selected || new_cost <= selected_cost))
12016 selected_cost = new_cost;
12017 selected_address = fix->address + count;
12020 /* Continue after the dispatch table. */
12021 from = NEXT_INSN (tmp);
12025 new_cost = arm_barrier_cost (from);
12027 if (count < max_count
12028 && (!selected || new_cost <= selected_cost))
12031 selected_cost = new_cost;
12032 selected_address = fix->address + count;
12035 from = NEXT_INSN (from);
12038 /* Make sure that we found a place to insert the jump. */
12039 gcc_assert (selected);
12041 /* Make sure we do not split a call and its corresponding
12042 CALL_ARG_LOCATION note. */
12043 if (CALL_P (selected))
12045 rtx next = NEXT_INSN (selected);
12046 if (next && NOTE_P (next)
12047 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
12051 /* Create a new JUMP_INSN that branches around a barrier. */
12052 from = emit_jump_insn_after (gen_jump (label), selected);
12053 JUMP_LABEL (from) = label;
12054 barrier = emit_barrier_after (from);
12055 emit_label_after (label, barrier);
12057 /* Create a minipool barrier entry for the new barrier. */
12058 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
12059 new_fix->insn = barrier;
12060 new_fix->address = selected_address;
12061 new_fix->next = fix->next;
12062 fix->next = new_fix;
12067 /* Record that there is a natural barrier in the insn stream at
12070 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
12072 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12075 fix->address = address;
12078 if (minipool_fix_head != NULL)
12079 minipool_fix_tail->next = fix;
12081 minipool_fix_head = fix;
12083 minipool_fix_tail = fix;
12086 /* Record INSN, which will need fixing up to load a value from the
12087 minipool. ADDRESS is the offset of the insn since the start of the
12088 function; LOC is a pointer to the part of the insn which requires
12089 fixing; VALUE is the constant that must be loaded, which is of type
12092 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
12093 enum machine_mode mode, rtx value)
12095 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12098 fix->address = address;
12101 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
12102 fix->value = value;
12103 fix->forwards = get_attr_pool_range (insn);
12104 fix->backwards = get_attr_neg_pool_range (insn);
12105 fix->minipool = NULL;
12107 /* If an insn doesn't have a range defined for it, then it isn't
12108 expecting to be reworked by this code. Better to stop now than
12109 to generate duff assembly code. */
12110 gcc_assert (fix->forwards || fix->backwards);
12112 /* If an entry requires 8-byte alignment then assume all constant pools
12113 require 4 bytes of padding. Trying to do this later on a per-pool
12114 basis is awkward because existing pool entries have to be modified. */
12115 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
12120 fprintf (dump_file,
12121 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12122 GET_MODE_NAME (mode),
12123 INSN_UID (insn), (unsigned long) address,
12124 -1 * (long)fix->backwards, (long)fix->forwards);
12125 arm_print_value (dump_file, fix->value);
12126 fprintf (dump_file, "\n");
12129 /* Add it to the chain of fixes. */
12132 if (minipool_fix_head != NULL)
12133 minipool_fix_tail->next = fix;
12135 minipool_fix_head = fix;
12137 minipool_fix_tail = fix;
12140 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12141 Returns the number of insns needed, or 99 if we don't know how to
12144 arm_const_double_inline_cost (rtx val)
12146 rtx lowpart, highpart;
12147 enum machine_mode mode;
12149 mode = GET_MODE (val);
12151 if (mode == VOIDmode)
12154 gcc_assert (GET_MODE_SIZE (mode) == 8);
12156 lowpart = gen_lowpart (SImode, val);
12157 highpart = gen_highpart_mode (SImode, mode, val);
12159 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12160 gcc_assert (GET_CODE (highpart) == CONST_INT);
12162 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12163 NULL_RTX, NULL_RTX, 0, 0)
12164 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12165 NULL_RTX, NULL_RTX, 0, 0));
12168 /* Return true if it is worthwhile to split a 64-bit constant into two
12169 32-bit operations. This is the case if optimizing for size, or
12170 if we have load delay slots, or if one 32-bit part can be done with
12171 a single data operation. */
12173 arm_const_double_by_parts (rtx val)
12175 enum machine_mode mode = GET_MODE (val);
12178 if (optimize_size || arm_ld_sched)
12181 if (mode == VOIDmode)
12184 part = gen_highpart_mode (SImode, mode, val);
12186 gcc_assert (GET_CODE (part) == CONST_INT);
12188 if (const_ok_for_arm (INTVAL (part))
12189 || const_ok_for_arm (~INTVAL (part)))
12192 part = gen_lowpart (SImode, val);
12194 gcc_assert (GET_CODE (part) == CONST_INT);
12196 if (const_ok_for_arm (INTVAL (part))
12197 || const_ok_for_arm (~INTVAL (part)))
12203 /* Return true if it is possible to inline both the high and low parts
12204 of a 64-bit constant into 32-bit data processing instructions. */
12206 arm_const_double_by_immediates (rtx val)
12208 enum machine_mode mode = GET_MODE (val);
12211 if (mode == VOIDmode)
12214 part = gen_highpart_mode (SImode, mode, val);
12216 gcc_assert (GET_CODE (part) == CONST_INT);
12218 if (!const_ok_for_arm (INTVAL (part)))
12221 part = gen_lowpart (SImode, val);
12223 gcc_assert (GET_CODE (part) == CONST_INT);
12225 if (!const_ok_for_arm (INTVAL (part)))
12231 /* Scan INSN and note any of its operands that need fixing.
12232 If DO_PUSHES is false we do not actually push any of the fixups
12233 needed. The function returns TRUE if any fixups were needed/pushed.
12234 This is used by arm_memory_load_p() which needs to know about loads
12235 of constants that will be converted into minipool loads. */
12237 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12239 bool result = false;
12242 extract_insn (insn);
12244 if (!constrain_operands (1))
12245 fatal_insn_not_found (insn);
12247 if (recog_data.n_alternatives == 0)
12250 /* Fill in recog_op_alt with information about the constraints of
12252 preprocess_constraints ();
12254 for (opno = 0; opno < recog_data.n_operands; opno++)
12256 /* Things we need to fix can only occur in inputs. */
12257 if (recog_data.operand_type[opno] != OP_IN)
12260 /* If this alternative is a memory reference, then any mention
12261 of constants in this alternative is really to fool reload
12262 into allowing us to accept one there. We need to fix them up
12263 now so that we output the right code. */
12264 if (recog_op_alt[opno][which_alternative].memory_ok)
12266 rtx op = recog_data.operand[opno];
12268 if (CONSTANT_P (op))
12271 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12272 recog_data.operand_mode[opno], op);
12275 else if (GET_CODE (op) == MEM
12276 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12277 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12281 rtx cop = avoid_constant_pool_reference (op);
12283 /* Casting the address of something to a mode narrower
12284 than a word can cause avoid_constant_pool_reference()
12285 to return the pool reference itself. That's no good to
12286 us here. Lets just hope that we can use the
12287 constant pool value directly. */
12289 cop = get_pool_constant (XEXP (op, 0));
12291 push_minipool_fix (insn, address,
12292 recog_data.operand_loc[opno],
12293 recog_data.operand_mode[opno], cop);
12304 /* Convert instructions to their cc-clobbering variant if possible, since
12305 that allows us to use smaller encodings. */
12308 thumb2_reorg (void)
12313 INIT_REG_SET (&live);
12315 /* We are freeing block_for_insn in the toplev to keep compatibility
12316 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12317 compute_bb_for_insn ();
12324 COPY_REG_SET (&live, DF_LR_OUT (bb));
12325 df_simulate_initialize_backwards (bb, &live);
12326 FOR_BB_INSNS_REVERSE (bb, insn)
12328 if (NONJUMP_INSN_P (insn)
12329 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12331 rtx pat = PATTERN (insn);
12332 if (GET_CODE (pat) == SET
12333 && low_register_operand (XEXP (pat, 0), SImode)
12334 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12335 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12336 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12338 rtx dst = XEXP (pat, 0);
12339 rtx src = XEXP (pat, 1);
12340 rtx op0 = XEXP (src, 0);
12341 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
12342 ? XEXP (src, 1) : NULL);
12344 if (rtx_equal_p (dst, op0)
12345 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12347 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12348 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12349 rtvec vec = gen_rtvec (2, pat, clobber);
12351 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12352 INSN_CODE (insn) = -1;
12354 /* We can also handle a commutative operation where the
12355 second operand matches the destination. */
12356 else if (op1 && rtx_equal_p (dst, op1))
12358 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12359 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12362 src = copy_rtx (src);
12363 XEXP (src, 0) = op1;
12364 XEXP (src, 1) = op0;
12365 pat = gen_rtx_SET (VOIDmode, dst, src);
12366 vec = gen_rtvec (2, pat, clobber);
12367 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12368 INSN_CODE (insn) = -1;
12373 if (NONDEBUG_INSN_P (insn))
12374 df_simulate_one_insn_backwards (bb, insn, &live);
12378 CLEAR_REG_SET (&live);
12381 /* Gcc puts the pool in the wrong place for ARM, since we can only
12382 load addresses a limited distance around the pc. We do some
12383 special munging to move the constant pool values to the correct
12384 point in the code. */
12389 HOST_WIDE_INT address = 0;
12395 minipool_fix_head = minipool_fix_tail = NULL;
12397 /* The first insn must always be a note, or the code below won't
12398 scan it properly. */
12399 insn = get_insns ();
12400 gcc_assert (GET_CODE (insn) == NOTE);
12403 /* Scan all the insns and record the operands that will need fixing. */
12404 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12406 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12407 && (arm_cirrus_insn_p (insn)
12408 || GET_CODE (insn) == JUMP_INSN
12409 || arm_memory_load_p (insn)))
12410 cirrus_reorg (insn);
12412 if (GET_CODE (insn) == BARRIER)
12413 push_minipool_barrier (insn, address);
12414 else if (INSN_P (insn))
12418 note_invalid_constants (insn, address, true);
12419 address += get_attr_length (insn);
12421 /* If the insn is a vector jump, add the size of the table
12422 and skip the table. */
12423 if ((table = is_jump_table (insn)) != NULL)
12425 address += get_jump_table_size (table);
12431 fix = minipool_fix_head;
12433 /* Now scan the fixups and perform the required changes. */
12438 Mfix * last_added_fix;
12439 Mfix * last_barrier = NULL;
12442 /* Skip any further barriers before the next fix. */
12443 while (fix && GET_CODE (fix->insn) == BARRIER)
12446 /* No more fixes. */
12450 last_added_fix = NULL;
12452 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12454 if (GET_CODE (ftmp->insn) == BARRIER)
12456 if (ftmp->address >= minipool_vector_head->max_address)
12459 last_barrier = ftmp;
12461 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12464 last_added_fix = ftmp; /* Keep track of the last fix added. */
12467 /* If we found a barrier, drop back to that; any fixes that we
12468 could have reached but come after the barrier will now go in
12469 the next mini-pool. */
12470 if (last_barrier != NULL)
12472 /* Reduce the refcount for those fixes that won't go into this
12474 for (fdel = last_barrier->next;
12475 fdel && fdel != ftmp;
12478 fdel->minipool->refcount--;
12479 fdel->minipool = NULL;
12482 ftmp = last_barrier;
12486 /* ftmp is first fix that we can't fit into this pool and
12487 there no natural barriers that we could use. Insert a
12488 new barrier in the code somewhere between the previous
12489 fix and this one, and arrange to jump around it. */
12490 HOST_WIDE_INT max_address;
12492 /* The last item on the list of fixes must be a barrier, so
12493 we can never run off the end of the list of fixes without
12494 last_barrier being set. */
12497 max_address = minipool_vector_head->max_address;
12498 /* Check that there isn't another fix that is in range that
12499 we couldn't fit into this pool because the pool was
12500 already too large: we need to put the pool before such an
12501 instruction. The pool itself may come just after the
12502 fix because create_fix_barrier also allows space for a
12503 jump instruction. */
12504 if (ftmp->address < max_address)
12505 max_address = ftmp->address + 1;
12507 last_barrier = create_fix_barrier (last_added_fix, max_address);
12510 assign_minipool_offsets (last_barrier);
12514 if (GET_CODE (ftmp->insn) != BARRIER
12515 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12522 /* Scan over the fixes we have identified for this pool, fixing them
12523 up and adding the constants to the pool itself. */
12524 for (this_fix = fix; this_fix && ftmp != this_fix;
12525 this_fix = this_fix->next)
12526 if (GET_CODE (this_fix->insn) != BARRIER)
12529 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12530 minipool_vector_label),
12531 this_fix->minipool->offset);
12532 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12535 dump_minipool (last_barrier->insn);
12539 /* From now on we must synthesize any constants that we can't handle
12540 directly. This can happen if the RTL gets split during final
12541 instruction generation. */
12542 after_arm_reorg = 1;
12544 /* Free the minipool memory. */
12545 obstack_free (&minipool_obstack, minipool_startobj);
12548 /* Routines to output assembly language. */
12550 /* If the rtx is the correct value then return the string of the number.
12551 In this way we can ensure that valid double constants are generated even
12552 when cross compiling. */
12554 fp_immediate_constant (rtx x)
12559 if (!fp_consts_inited)
12562 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12563 for (i = 0; i < 8; i++)
12564 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12565 return strings_fp[i];
12567 gcc_unreachable ();
12570 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12571 static const char *
12572 fp_const_from_val (REAL_VALUE_TYPE *r)
12576 if (!fp_consts_inited)
12579 for (i = 0; i < 8; i++)
12580 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12581 return strings_fp[i];
12583 gcc_unreachable ();
12586 /* Output the operands of a LDM/STM instruction to STREAM.
12587 MASK is the ARM register set mask of which only bits 0-15 are important.
12588 REG is the base register, either the frame pointer or the stack pointer,
12589 INSTR is the possibly suffixed load or store instruction.
12590 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12593 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12594 unsigned long mask, int rfe)
12597 bool not_first = FALSE;
12599 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12600 fputc ('\t', stream);
12601 asm_fprintf (stream, instr, reg);
12602 fputc ('{', stream);
12604 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12605 if (mask & (1 << i))
12608 fprintf (stream, ", ");
12610 asm_fprintf (stream, "%r", i);
12615 fprintf (stream, "}^\n");
12617 fprintf (stream, "}\n");
12621 /* Output a FLDMD instruction to STREAM.
12622 BASE if the register containing the address.
12623 REG and COUNT specify the register range.
12624 Extra registers may be added to avoid hardware bugs.
12626 We output FLDMD even for ARMv5 VFP implementations. Although
12627 FLDMD is technically not supported until ARMv6, it is believed
12628 that all VFP implementations support its use in this context. */
12631 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12635 /* Workaround ARM10 VFPr1 bug. */
12636 if (count == 2 && !arm_arch6)
12643 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12644 load into multiple parts if we have to handle more than 16 registers. */
12647 vfp_output_fldmd (stream, base, reg, 16);
12648 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12652 fputc ('\t', stream);
12653 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12655 for (i = reg; i < reg + count; i++)
12658 fputs (", ", stream);
12659 asm_fprintf (stream, "d%d", i);
12661 fputs ("}\n", stream);
12666 /* Output the assembly for a store multiple. */
12669 vfp_output_fstmd (rtx * operands)
12676 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12677 p = strlen (pattern);
12679 gcc_assert (GET_CODE (operands[1]) == REG);
12681 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12682 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12684 p += sprintf (&pattern[p], ", d%d", base + i);
12686 strcpy (&pattern[p], "}");
12688 output_asm_insn (pattern, operands);
12693 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12694 number of bytes pushed. */
12697 vfp_emit_fstmd (int base_reg, int count)
12704 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12705 register pairs are stored by a store multiple insn. We avoid this
12706 by pushing an extra pair. */
12707 if (count == 2 && !arm_arch6)
12709 if (base_reg == LAST_VFP_REGNUM - 3)
12714 /* FSTMD may not store more than 16 doubleword registers at once. Split
12715 larger stores into multiple parts (up to a maximum of two, in
12720 /* NOTE: base_reg is an internal register number, so each D register
12722 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12723 saved += vfp_emit_fstmd (base_reg, 16);
12727 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12728 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12730 reg = gen_rtx_REG (DFmode, base_reg);
12733 XVECEXP (par, 0, 0)
12734 = gen_rtx_SET (VOIDmode,
12737 gen_rtx_PRE_MODIFY (Pmode,
12740 (stack_pointer_rtx,
12743 gen_rtx_UNSPEC (BLKmode,
12744 gen_rtvec (1, reg),
12745 UNSPEC_PUSH_MULT));
12747 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12748 plus_constant (stack_pointer_rtx, -(count * 8)));
12749 RTX_FRAME_RELATED_P (tmp) = 1;
12750 XVECEXP (dwarf, 0, 0) = tmp;
12752 tmp = gen_rtx_SET (VOIDmode,
12753 gen_frame_mem (DFmode, stack_pointer_rtx),
12755 RTX_FRAME_RELATED_P (tmp) = 1;
12756 XVECEXP (dwarf, 0, 1) = tmp;
12758 for (i = 1; i < count; i++)
12760 reg = gen_rtx_REG (DFmode, base_reg);
12762 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12764 tmp = gen_rtx_SET (VOIDmode,
12765 gen_frame_mem (DFmode,
12766 plus_constant (stack_pointer_rtx,
12769 RTX_FRAME_RELATED_P (tmp) = 1;
12770 XVECEXP (dwarf, 0, i + 1) = tmp;
12773 par = emit_insn (par);
12774 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12775 RTX_FRAME_RELATED_P (par) = 1;
12780 /* Emit a call instruction with pattern PAT. ADDR is the address of
12781 the call target. */
12784 arm_emit_call_insn (rtx pat, rtx addr)
12788 insn = emit_call_insn (pat);
12790 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12791 If the call might use such an entry, add a use of the PIC register
12792 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12793 if (TARGET_VXWORKS_RTP
12795 && GET_CODE (addr) == SYMBOL_REF
12796 && (SYMBOL_REF_DECL (addr)
12797 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12798 : !SYMBOL_REF_LOCAL_P (addr)))
12800 require_pic_register ();
12801 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12805 /* Output a 'call' insn. */
12807 output_call (rtx *operands)
12809 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12811 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12812 if (REGNO (operands[0]) == LR_REGNUM)
12814 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12815 output_asm_insn ("mov%?\t%0, %|lr", operands);
12818 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12820 if (TARGET_INTERWORK || arm_arch4t)
12821 output_asm_insn ("bx%?\t%0", operands);
12823 output_asm_insn ("mov%?\t%|pc, %0", operands);
12828 /* Output a 'call' insn that is a reference in memory. This is
12829 disabled for ARMv5 and we prefer a blx instead because otherwise
12830 there's a significant performance overhead. */
12832 output_call_mem (rtx *operands)
12834 gcc_assert (!arm_arch5);
12835 if (TARGET_INTERWORK)
12837 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12838 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12839 output_asm_insn ("bx%?\t%|ip", operands);
12841 else if (regno_use_in (LR_REGNUM, operands[0]))
12843 /* LR is used in the memory address. We load the address in the
12844 first instruction. It's safe to use IP as the target of the
12845 load since the call will kill it anyway. */
12846 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12847 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12849 output_asm_insn ("bx%?\t%|ip", operands);
12851 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12855 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12856 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12863 /* Output a move from arm registers to an fpa registers.
12864 OPERANDS[0] is an fpa register.
12865 OPERANDS[1] is the first registers of an arm register pair. */
12867 output_mov_long_double_fpa_from_arm (rtx *operands)
12869 int arm_reg0 = REGNO (operands[1]);
12872 gcc_assert (arm_reg0 != IP_REGNUM);
12874 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12875 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12876 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12878 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12879 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12884 /* Output a move from an fpa register to arm registers.
12885 OPERANDS[0] is the first registers of an arm register pair.
12886 OPERANDS[1] is an fpa register. */
12888 output_mov_long_double_arm_from_fpa (rtx *operands)
12890 int arm_reg0 = REGNO (operands[0]);
12893 gcc_assert (arm_reg0 != IP_REGNUM);
12895 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12896 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12897 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12899 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12900 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12904 /* Output a move from arm registers to arm registers of a long double
12905 OPERANDS[0] is the destination.
12906 OPERANDS[1] is the source. */
12908 output_mov_long_double_arm_from_arm (rtx *operands)
12910 /* We have to be careful here because the two might overlap. */
12911 int dest_start = REGNO (operands[0]);
12912 int src_start = REGNO (operands[1]);
12916 if (dest_start < src_start)
12918 for (i = 0; i < 3; i++)
12920 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12921 ops[1] = gen_rtx_REG (SImode, src_start + i);
12922 output_asm_insn ("mov%?\t%0, %1", ops);
12927 for (i = 2; i >= 0; i--)
12929 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12930 ops[1] = gen_rtx_REG (SImode, src_start + i);
12931 output_asm_insn ("mov%?\t%0, %1", ops);
12939 arm_emit_movpair (rtx dest, rtx src)
12941 /* If the src is an immediate, simplify it. */
12942 if (CONST_INT_P (src))
12944 HOST_WIDE_INT val = INTVAL (src);
12945 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12946 if ((val >> 16) & 0x0000ffff)
12947 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12949 GEN_INT ((val >> 16) & 0x0000ffff));
12952 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12953 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12956 /* Output a move from arm registers to an fpa registers.
12957 OPERANDS[0] is an fpa register.
12958 OPERANDS[1] is the first registers of an arm register pair. */
12960 output_mov_double_fpa_from_arm (rtx *operands)
12962 int arm_reg0 = REGNO (operands[1]);
12965 gcc_assert (arm_reg0 != IP_REGNUM);
12967 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12968 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12969 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12970 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12974 /* Output a move from an fpa register to arm registers.
12975 OPERANDS[0] is the first registers of an arm register pair.
12976 OPERANDS[1] is an fpa register. */
12978 output_mov_double_arm_from_fpa (rtx *operands)
12980 int arm_reg0 = REGNO (operands[0]);
12983 gcc_assert (arm_reg0 != IP_REGNUM);
12985 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12986 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12987 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12988 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12992 /* Output a move between double words. It must be REG<-MEM
12995 output_move_double (rtx *operands)
12997 enum rtx_code code0 = GET_CODE (operands[0]);
12998 enum rtx_code code1 = GET_CODE (operands[1]);
13003 unsigned int reg0 = REGNO (operands[0]);
13005 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
13007 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
13009 switch (GET_CODE (XEXP (operands[1], 0)))
13013 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
13014 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
13016 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13020 gcc_assert (TARGET_LDRD);
13021 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
13026 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
13028 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
13033 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
13035 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
13039 gcc_assert (TARGET_LDRD);
13040 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
13045 /* Autoicrement addressing modes should never have overlapping
13046 base and destination registers, and overlapping index registers
13047 are already prohibited, so this doesn't need to worry about
13049 otherops[0] = operands[0];
13050 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
13051 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
13053 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
13055 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
13057 /* Registers overlap so split out the increment. */
13058 output_asm_insn ("add%?\t%1, %1, %2", otherops);
13059 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
13063 /* Use a single insn if we can.
13064 FIXME: IWMMXT allows offsets larger than ldrd can
13065 handle, fix these up with a pair of ldr. */
13067 || GET_CODE (otherops[2]) != CONST_INT
13068 || (INTVAL (otherops[2]) > -256
13069 && INTVAL (otherops[2]) < 256))
13070 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
13073 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
13074 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13080 /* Use a single insn if we can.
13081 FIXME: IWMMXT allows offsets larger than ldrd can handle,
13082 fix these up with a pair of ldr. */
13084 || GET_CODE (otherops[2]) != CONST_INT
13085 || (INTVAL (otherops[2]) > -256
13086 && INTVAL (otherops[2]) < 256))
13087 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
13090 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13091 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
13098 /* We might be able to use ldrd %0, %1 here. However the range is
13099 different to ldr/adr, and it is broken on some ARMv7-M
13100 implementations. */
13101 /* Use the second register of the pair to avoid problematic
13103 otherops[1] = operands[1];
13104 output_asm_insn ("adr%?\t%0, %1", otherops);
13105 operands[1] = otherops[0];
13107 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13109 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
13112 /* ??? This needs checking for thumb2. */
13114 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
13115 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
13117 otherops[0] = operands[0];
13118 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
13119 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
13121 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
13123 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13125 switch ((int) INTVAL (otherops[2]))
13128 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
13133 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
13138 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
13142 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
13143 operands[1] = otherops[0];
13145 && (GET_CODE (otherops[2]) == REG
13147 || (GET_CODE (otherops[2]) == CONST_INT
13148 && INTVAL (otherops[2]) > -256
13149 && INTVAL (otherops[2]) < 256)))
13151 if (reg_overlap_mentioned_p (operands[0],
13155 /* Swap base and index registers over to
13156 avoid a conflict. */
13158 otherops[1] = otherops[2];
13161 /* If both registers conflict, it will usually
13162 have been fixed by a splitter. */
13163 if (reg_overlap_mentioned_p (operands[0], otherops[2])
13164 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
13166 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13167 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13171 otherops[0] = operands[0];
13172 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
13177 if (GET_CODE (otherops[2]) == CONST_INT)
13179 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
13180 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
13182 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13185 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13188 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
13191 return "ldr%(d%)\t%0, [%1]";
13193 return "ldm%(ia%)\t%1, %M0";
13197 otherops[1] = adjust_address (operands[1], SImode, 4);
13198 /* Take care of overlapping base/data reg. */
13199 if (reg_mentioned_p (operands[0], operands[1]))
13201 output_asm_insn ("ldr%?\t%0, %1", otherops);
13202 output_asm_insn ("ldr%?\t%0, %1", operands);
13206 output_asm_insn ("ldr%?\t%0, %1", operands);
13207 output_asm_insn ("ldr%?\t%0, %1", otherops);
13214 /* Constraints should ensure this. */
13215 gcc_assert (code0 == MEM && code1 == REG);
13216 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13218 switch (GET_CODE (XEXP (operands[0], 0)))
13222 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
13224 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13228 gcc_assert (TARGET_LDRD);
13229 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
13234 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
13236 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
13241 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
13243 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
13247 gcc_assert (TARGET_LDRD);
13248 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
13253 otherops[0] = operands[1];
13254 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
13255 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13257 /* IWMMXT allows offsets larger than ldrd can handle,
13258 fix these up with a pair of ldr. */
13260 && GET_CODE (otherops[2]) == CONST_INT
13261 && (INTVAL(otherops[2]) <= -256
13262 || INTVAL(otherops[2]) >= 256))
13264 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13266 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13267 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13271 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13272 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13275 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13276 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13278 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13282 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13283 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13285 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13288 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13294 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13300 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13305 && (GET_CODE (otherops[2]) == REG
13307 || (GET_CODE (otherops[2]) == CONST_INT
13308 && INTVAL (otherops[2]) > -256
13309 && INTVAL (otherops[2]) < 256)))
13311 otherops[0] = operands[1];
13312 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13313 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13319 otherops[0] = adjust_address (operands[0], SImode, 4);
13320 otherops[1] = operands[1];
13321 output_asm_insn ("str%?\t%1, %0", operands);
13322 output_asm_insn ("str%?\t%H1, %0", otherops);
13329 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13330 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13333 output_move_quad (rtx *operands)
13335 if (REG_P (operands[0]))
13337 /* Load, or reg->reg move. */
13339 if (MEM_P (operands[1]))
13341 switch (GET_CODE (XEXP (operands[1], 0)))
13344 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13349 output_asm_insn ("adr%?\t%0, %1", operands);
13350 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13354 gcc_unreachable ();
13362 gcc_assert (REG_P (operands[1]));
13364 dest = REGNO (operands[0]);
13365 src = REGNO (operands[1]);
13367 /* This seems pretty dumb, but hopefully GCC won't try to do it
13370 for (i = 0; i < 4; i++)
13372 ops[0] = gen_rtx_REG (SImode, dest + i);
13373 ops[1] = gen_rtx_REG (SImode, src + i);
13374 output_asm_insn ("mov%?\t%0, %1", ops);
13377 for (i = 3; i >= 0; i--)
13379 ops[0] = gen_rtx_REG (SImode, dest + i);
13380 ops[1] = gen_rtx_REG (SImode, src + i);
13381 output_asm_insn ("mov%?\t%0, %1", ops);
13387 gcc_assert (MEM_P (operands[0]));
13388 gcc_assert (REG_P (operands[1]));
13389 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13391 switch (GET_CODE (XEXP (operands[0], 0)))
13394 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13398 gcc_unreachable ();
13405 /* Output a VFP load or store instruction. */
13408 output_move_vfp (rtx *operands)
13410 rtx reg, mem, addr, ops[2];
13411 int load = REG_P (operands[0]);
13412 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13413 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13416 enum machine_mode mode;
13418 reg = operands[!load];
13419 mem = operands[load];
13421 mode = GET_MODE (reg);
13423 gcc_assert (REG_P (reg));
13424 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13425 gcc_assert (mode == SFmode
13429 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13430 gcc_assert (MEM_P (mem));
13432 addr = XEXP (mem, 0);
13434 switch (GET_CODE (addr))
13437 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13438 ops[0] = XEXP (addr, 0);
13443 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13444 ops[0] = XEXP (addr, 0);
13449 templ = "f%s%c%%?\t%%%s0, %%1%s";
13455 sprintf (buff, templ,
13456 load ? "ld" : "st",
13459 integer_p ? "\t%@ int" : "");
13460 output_asm_insn (buff, ops);
13465 /* Output a Neon quad-word load or store, or a load or store for
13466 larger structure modes.
13468 WARNING: The ordering of elements is weird in big-endian mode,
13469 because we use VSTM, as required by the EABI. GCC RTL defines
13470 element ordering based on in-memory order. This can be differ
13471 from the architectural ordering of elements within a NEON register.
13472 The intrinsics defined in arm_neon.h use the NEON register element
13473 ordering, not the GCC RTL element ordering.
13475 For example, the in-memory ordering of a big-endian a quadword
13476 vector with 16-bit elements when stored from register pair {d0,d1}
13477 will be (lowest address first, d0[N] is NEON register element N):
13479 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13481 When necessary, quadword registers (dN, dN+1) are moved to ARM
13482 registers from rN in the order:
13484 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13486 So that STM/LDM can be used on vectors in ARM registers, and the
13487 same memory layout will result as if VSTM/VLDM were used. */
13490 output_move_neon (rtx *operands)
13492 rtx reg, mem, addr, ops[2];
13493 int regno, load = REG_P (operands[0]);
13496 enum machine_mode mode;
13498 reg = operands[!load];
13499 mem = operands[load];
13501 mode = GET_MODE (reg);
13503 gcc_assert (REG_P (reg));
13504 regno = REGNO (reg);
13505 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13506 || NEON_REGNO_OK_FOR_QUAD (regno));
13507 gcc_assert (VALID_NEON_DREG_MODE (mode)
13508 || VALID_NEON_QREG_MODE (mode)
13509 || VALID_NEON_STRUCT_MODE (mode));
13510 gcc_assert (MEM_P (mem));
13512 addr = XEXP (mem, 0);
13514 /* Strip off const from addresses like (const (plus (...))). */
13515 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13516 addr = XEXP (addr, 0);
13518 switch (GET_CODE (addr))
13521 templ = "v%smia%%?\t%%0!, %%h1";
13522 ops[0] = XEXP (addr, 0);
13527 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13528 templ = "v%smdb%%?\t%%0!, %%h1";
13529 ops[0] = XEXP (addr, 0);
13534 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13535 gcc_unreachable ();
13540 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13543 for (i = 0; i < nregs; i++)
13545 /* We're only using DImode here because it's a convenient size. */
13546 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13547 ops[1] = adjust_address (mem, DImode, 8 * i);
13548 if (reg_overlap_mentioned_p (ops[0], mem))
13550 gcc_assert (overlap == -1);
13555 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13556 output_asm_insn (buff, ops);
13561 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13562 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13563 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13564 output_asm_insn (buff, ops);
13571 templ = "v%smia%%?\t%%m0, %%h1";
13576 sprintf (buff, templ, load ? "ld" : "st");
13577 output_asm_insn (buff, ops);
13582 /* Compute and return the length of neon_mov<mode>, where <mode> is
13583 one of VSTRUCT modes: EI, OI, CI or XI. */
13585 arm_attr_length_move_neon (rtx insn)
13587 rtx reg, mem, addr;
13589 enum machine_mode mode;
13591 extract_insn_cached (insn);
13593 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13595 mode = GET_MODE (recog_data.operand[0]);
13606 gcc_unreachable ();
13610 load = REG_P (recog_data.operand[0]);
13611 reg = recog_data.operand[!load];
13612 mem = recog_data.operand[load];
13614 gcc_assert (MEM_P (mem));
13616 mode = GET_MODE (reg);
13617 addr = XEXP (mem, 0);
13619 /* Strip off const from addresses like (const (plus (...))). */
13620 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13621 addr = XEXP (addr, 0);
13623 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13625 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13632 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13636 arm_address_offset_is_imm (rtx insn)
13640 extract_insn_cached (insn);
13642 if (REG_P (recog_data.operand[0]))
13645 mem = recog_data.operand[0];
13647 gcc_assert (MEM_P (mem));
13649 addr = XEXP (mem, 0);
13651 if (GET_CODE (addr) == REG
13652 || (GET_CODE (addr) == PLUS
13653 && GET_CODE (XEXP (addr, 0)) == REG
13654 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13660 /* Output an ADD r, s, #n where n may be too big for one instruction.
13661 If adding zero to one register, output nothing. */
13663 output_add_immediate (rtx *operands)
13665 HOST_WIDE_INT n = INTVAL (operands[2]);
13667 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13670 output_multi_immediate (operands,
13671 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13674 output_multi_immediate (operands,
13675 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13682 /* Output a multiple immediate operation.
13683 OPERANDS is the vector of operands referred to in the output patterns.
13684 INSTR1 is the output pattern to use for the first constant.
13685 INSTR2 is the output pattern to use for subsequent constants.
13686 IMMED_OP is the index of the constant slot in OPERANDS.
13687 N is the constant value. */
13688 static const char *
13689 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13690 int immed_op, HOST_WIDE_INT n)
13692 #if HOST_BITS_PER_WIDE_INT > 32
13698 /* Quick and easy output. */
13699 operands[immed_op] = const0_rtx;
13700 output_asm_insn (instr1, operands);
13705 const char * instr = instr1;
13707 /* Note that n is never zero here (which would give no output). */
13708 for (i = 0; i < 32; i += 2)
13712 operands[immed_op] = GEN_INT (n & (255 << i));
13713 output_asm_insn (instr, operands);
13723 /* Return the name of a shifter operation. */
13724 static const char *
13725 arm_shift_nmem(enum rtx_code code)
13730 return ARM_LSL_NAME;
13746 /* Return the appropriate ARM instruction for the operation code.
13747 The returned result should not be overwritten. OP is the rtx of the
13748 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13751 arithmetic_instr (rtx op, int shift_first_arg)
13753 switch (GET_CODE (op))
13759 return shift_first_arg ? "rsb" : "sub";
13774 return arm_shift_nmem(GET_CODE(op));
13777 gcc_unreachable ();
13781 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13782 for the operation code. The returned result should not be overwritten.
13783 OP is the rtx code of the shift.
13784 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13786 static const char *
13787 shift_op (rtx op, HOST_WIDE_INT *amountp)
13790 enum rtx_code code = GET_CODE (op);
13792 switch (GET_CODE (XEXP (op, 1)))
13800 *amountp = INTVAL (XEXP (op, 1));
13804 gcc_unreachable ();
13810 gcc_assert (*amountp != -1);
13811 *amountp = 32 - *amountp;
13814 /* Fall through. */
13820 mnem = arm_shift_nmem(code);
13824 /* We never have to worry about the amount being other than a
13825 power of 2, since this case can never be reloaded from a reg. */
13826 gcc_assert (*amountp != -1);
13827 *amountp = int_log2 (*amountp);
13828 return ARM_LSL_NAME;
13831 gcc_unreachable ();
13834 if (*amountp != -1)
13836 /* This is not 100% correct, but follows from the desire to merge
13837 multiplication by a power of 2 with the recognizer for a
13838 shift. >=32 is not a valid shift for "lsl", so we must try and
13839 output a shift that produces the correct arithmetical result.
13840 Using lsr #32 is identical except for the fact that the carry bit
13841 is not set correctly if we set the flags; but we never use the
13842 carry bit from such an operation, so we can ignore that. */
13843 if (code == ROTATERT)
13844 /* Rotate is just modulo 32. */
13846 else if (*amountp != (*amountp & 31))
13848 if (code == ASHIFT)
13853 /* Shifts of 0 are no-ops. */
13861 /* Obtain the shift from the POWER of two. */
13863 static HOST_WIDE_INT
13864 int_log2 (HOST_WIDE_INT power)
13866 HOST_WIDE_INT shift = 0;
13868 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13870 gcc_assert (shift <= 31);
13877 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13878 because /bin/as is horribly restrictive. The judgement about
13879 whether or not each character is 'printable' (and can be output as
13880 is) or not (and must be printed with an octal escape) must be made
13881 with reference to the *host* character set -- the situation is
13882 similar to that discussed in the comments above pp_c_char in
13883 c-pretty-print.c. */
13885 #define MAX_ASCII_LEN 51
13888 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13891 int len_so_far = 0;
13893 fputs ("\t.ascii\t\"", stream);
13895 for (i = 0; i < len; i++)
13899 if (len_so_far >= MAX_ASCII_LEN)
13901 fputs ("\"\n\t.ascii\t\"", stream);
13907 if (c == '\\' || c == '\"')
13909 putc ('\\', stream);
13917 fprintf (stream, "\\%03o", c);
13922 fputs ("\"\n", stream);
13925 /* Compute the register save mask for registers 0 through 12
13926 inclusive. This code is used by arm_compute_save_reg_mask. */
13928 static unsigned long
13929 arm_compute_save_reg0_reg12_mask (void)
13931 unsigned long func_type = arm_current_func_type ();
13932 unsigned long save_reg_mask = 0;
13935 if (IS_INTERRUPT (func_type))
13937 unsigned int max_reg;
13938 /* Interrupt functions must not corrupt any registers,
13939 even call clobbered ones. If this is a leaf function
13940 we can just examine the registers used by the RTL, but
13941 otherwise we have to assume that whatever function is
13942 called might clobber anything, and so we have to save
13943 all the call-clobbered registers as well. */
13944 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13945 /* FIQ handlers have registers r8 - r12 banked, so
13946 we only need to check r0 - r7, Normal ISRs only
13947 bank r14 and r15, so we must check up to r12.
13948 r13 is the stack pointer which is always preserved,
13949 so we do not need to consider it here. */
13954 for (reg = 0; reg <= max_reg; reg++)
13955 if (df_regs_ever_live_p (reg)
13956 || (! current_function_is_leaf && call_used_regs[reg]))
13957 save_reg_mask |= (1 << reg);
13959 /* Also save the pic base register if necessary. */
13961 && !TARGET_SINGLE_PIC_BASE
13962 && arm_pic_register != INVALID_REGNUM
13963 && crtl->uses_pic_offset_table)
13964 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13966 else if (IS_VOLATILE(func_type))
13968 /* For noreturn functions we historically omitted register saves
13969 altogether. However this really messes up debugging. As a
13970 compromise save just the frame pointers. Combined with the link
13971 register saved elsewhere this should be sufficient to get
13973 if (frame_pointer_needed)
13974 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13975 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13976 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13977 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13978 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13982 /* In the normal case we only need to save those registers
13983 which are call saved and which are used by this function. */
13984 for (reg = 0; reg <= 11; reg++)
13985 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13986 save_reg_mask |= (1 << reg);
13988 /* Handle the frame pointer as a special case. */
13989 if (frame_pointer_needed)
13990 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13992 /* If we aren't loading the PIC register,
13993 don't stack it even though it may be live. */
13995 && !TARGET_SINGLE_PIC_BASE
13996 && arm_pic_register != INVALID_REGNUM
13997 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13998 || crtl->uses_pic_offset_table))
13999 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14001 /* The prologue will copy SP into R0, so save it. */
14002 if (IS_STACKALIGN (func_type))
14003 save_reg_mask |= 1;
14006 /* Save registers so the exception handler can modify them. */
14007 if (crtl->calls_eh_return)
14013 reg = EH_RETURN_DATA_REGNO (i);
14014 if (reg == INVALID_REGNUM)
14016 save_reg_mask |= 1 << reg;
14020 return save_reg_mask;
14024 /* Compute the number of bytes used to store the static chain register on the
14025 stack, above the stack frame. We need to know this accurately to get the
14026 alignment of the rest of the stack frame correct. */
14028 static int arm_compute_static_chain_stack_bytes (void)
14030 unsigned long func_type = arm_current_func_type ();
14031 int static_chain_stack_bytes = 0;
14033 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
14034 IS_NESTED (func_type) &&
14035 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
14036 static_chain_stack_bytes = 4;
14038 return static_chain_stack_bytes;
14042 /* Compute a bit mask of which registers need to be
14043 saved on the stack for the current function.
14044 This is used by arm_get_frame_offsets, which may add extra registers. */
14046 static unsigned long
14047 arm_compute_save_reg_mask (void)
14049 unsigned int save_reg_mask = 0;
14050 unsigned long func_type = arm_current_func_type ();
14053 if (IS_NAKED (func_type))
14054 /* This should never really happen. */
14057 /* If we are creating a stack frame, then we must save the frame pointer,
14058 IP (which will hold the old stack pointer), LR and the PC. */
14059 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14061 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
14064 | (1 << PC_REGNUM);
14066 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
14068 /* Decide if we need to save the link register.
14069 Interrupt routines have their own banked link register,
14070 so they never need to save it.
14071 Otherwise if we do not use the link register we do not need to save
14072 it. If we are pushing other registers onto the stack however, we
14073 can save an instruction in the epilogue by pushing the link register
14074 now and then popping it back into the PC. This incurs extra memory
14075 accesses though, so we only do it when optimizing for size, and only
14076 if we know that we will not need a fancy return sequence. */
14077 if (df_regs_ever_live_p (LR_REGNUM)
14080 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14081 && !crtl->calls_eh_return))
14082 save_reg_mask |= 1 << LR_REGNUM;
14084 if (cfun->machine->lr_save_eliminated)
14085 save_reg_mask &= ~ (1 << LR_REGNUM);
14087 if (TARGET_REALLY_IWMMXT
14088 && ((bit_count (save_reg_mask)
14089 + ARM_NUM_INTS (crtl->args.pretend_args_size +
14090 arm_compute_static_chain_stack_bytes())
14093 /* The total number of registers that are going to be pushed
14094 onto the stack is odd. We need to ensure that the stack
14095 is 64-bit aligned before we start to save iWMMXt registers,
14096 and also before we start to create locals. (A local variable
14097 might be a double or long long which we will load/store using
14098 an iWMMXt instruction). Therefore we need to push another
14099 ARM register, so that the stack will be 64-bit aligned. We
14100 try to avoid using the arg registers (r0 -r3) as they might be
14101 used to pass values in a tail call. */
14102 for (reg = 4; reg <= 12; reg++)
14103 if ((save_reg_mask & (1 << reg)) == 0)
14107 save_reg_mask |= (1 << reg);
14110 cfun->machine->sibcall_blocked = 1;
14111 save_reg_mask |= (1 << 3);
14115 /* We may need to push an additional register for use initializing the
14116 PIC base register. */
14117 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
14118 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
14120 reg = thumb_find_work_register (1 << 4);
14121 if (!call_used_regs[reg])
14122 save_reg_mask |= (1 << reg);
14125 return save_reg_mask;
14129 /* Compute a bit mask of which registers need to be
14130 saved on the stack for the current function. */
14131 static unsigned long
14132 thumb1_compute_save_reg_mask (void)
14134 unsigned long mask;
14138 for (reg = 0; reg < 12; reg ++)
14139 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14143 && !TARGET_SINGLE_PIC_BASE
14144 && arm_pic_register != INVALID_REGNUM
14145 && crtl->uses_pic_offset_table)
14146 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14148 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14149 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
14150 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14152 /* LR will also be pushed if any lo regs are pushed. */
14153 if (mask & 0xff || thumb_force_lr_save ())
14154 mask |= (1 << LR_REGNUM);
14156 /* Make sure we have a low work register if we need one.
14157 We will need one if we are going to push a high register,
14158 but we are not currently intending to push a low register. */
14159 if ((mask & 0xff) == 0
14160 && ((mask & 0x0f00) || TARGET_BACKTRACE))
14162 /* Use thumb_find_work_register to choose which register
14163 we will use. If the register is live then we will
14164 have to push it. Use LAST_LO_REGNUM as our fallback
14165 choice for the register to select. */
14166 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
14167 /* Make sure the register returned by thumb_find_work_register is
14168 not part of the return value. */
14169 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
14170 reg = LAST_LO_REGNUM;
14172 if (! call_used_regs[reg])
14176 /* The 504 below is 8 bytes less than 512 because there are two possible
14177 alignment words. We can't tell here if they will be present or not so we
14178 have to play it safe and assume that they are. */
14179 if ((CALLER_INTERWORKING_SLOT_SIZE +
14180 ROUND_UP_WORD (get_frame_size ()) +
14181 crtl->outgoing_args_size) >= 504)
14183 /* This is the same as the code in thumb1_expand_prologue() which
14184 determines which register to use for stack decrement. */
14185 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
14186 if (mask & (1 << reg))
14189 if (reg > LAST_LO_REGNUM)
14191 /* Make sure we have a register available for stack decrement. */
14192 mask |= 1 << LAST_LO_REGNUM;
14200 /* Return the number of bytes required to save VFP registers. */
14202 arm_get_vfp_saved_size (void)
14204 unsigned int regno;
14209 /* Space for saved VFP registers. */
14210 if (TARGET_HARD_FLOAT && TARGET_VFP)
14213 for (regno = FIRST_VFP_REGNUM;
14214 regno < LAST_VFP_REGNUM;
14217 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
14218 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
14222 /* Workaround ARM10 VFPr1 bug. */
14223 if (count == 2 && !arm_arch6)
14225 saved += count * 8;
14234 if (count == 2 && !arm_arch6)
14236 saved += count * 8;
14243 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14244 everything bar the final return instruction. */
14246 output_return_instruction (rtx operand, int really_return, int reverse)
14248 char conditional[10];
14251 unsigned long live_regs_mask;
14252 unsigned long func_type;
14253 arm_stack_offsets *offsets;
14255 func_type = arm_current_func_type ();
14257 if (IS_NAKED (func_type))
14260 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14262 /* If this function was declared non-returning, and we have
14263 found a tail call, then we have to trust that the called
14264 function won't return. */
14269 /* Otherwise, trap an attempted return by aborting. */
14271 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14273 assemble_external_libcall (ops[1]);
14274 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14280 gcc_assert (!cfun->calls_alloca || really_return);
14282 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14284 cfun->machine->return_used_this_function = 1;
14286 offsets = arm_get_frame_offsets ();
14287 live_regs_mask = offsets->saved_regs_mask;
14289 if (live_regs_mask)
14291 const char * return_reg;
14293 /* If we do not have any special requirements for function exit
14294 (e.g. interworking) then we can load the return address
14295 directly into the PC. Otherwise we must load it into LR. */
14297 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14298 return_reg = reg_names[PC_REGNUM];
14300 return_reg = reg_names[LR_REGNUM];
14302 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14304 /* There are three possible reasons for the IP register
14305 being saved. 1) a stack frame was created, in which case
14306 IP contains the old stack pointer, or 2) an ISR routine
14307 corrupted it, or 3) it was saved to align the stack on
14308 iWMMXt. In case 1, restore IP into SP, otherwise just
14310 if (frame_pointer_needed)
14312 live_regs_mask &= ~ (1 << IP_REGNUM);
14313 live_regs_mask |= (1 << SP_REGNUM);
14316 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14319 /* On some ARM architectures it is faster to use LDR rather than
14320 LDM to load a single register. On other architectures, the
14321 cost is the same. In 26 bit mode, or for exception handlers,
14322 we have to use LDM to load the PC so that the CPSR is also
14324 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14325 if (live_regs_mask == (1U << reg))
14328 if (reg <= LAST_ARM_REGNUM
14329 && (reg != LR_REGNUM
14331 || ! IS_INTERRUPT (func_type)))
14333 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14334 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14341 /* Generate the load multiple instruction to restore the
14342 registers. Note we can get here, even if
14343 frame_pointer_needed is true, but only if sp already
14344 points to the base of the saved core registers. */
14345 if (live_regs_mask & (1 << SP_REGNUM))
14347 unsigned HOST_WIDE_INT stack_adjust;
14349 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14350 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14352 if (stack_adjust && arm_arch5 && TARGET_ARM)
14353 if (TARGET_UNIFIED_ASM)
14354 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14356 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14359 /* If we can't use ldmib (SA110 bug),
14360 then try to pop r3 instead. */
14362 live_regs_mask |= 1 << 3;
14364 if (TARGET_UNIFIED_ASM)
14365 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14367 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14371 if (TARGET_UNIFIED_ASM)
14372 sprintf (instr, "pop%s\t{", conditional);
14374 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14376 p = instr + strlen (instr);
14378 for (reg = 0; reg <= SP_REGNUM; reg++)
14379 if (live_regs_mask & (1 << reg))
14381 int l = strlen (reg_names[reg]);
14387 memcpy (p, ", ", 2);
14391 memcpy (p, "%|", 2);
14392 memcpy (p + 2, reg_names[reg], l);
14396 if (live_regs_mask & (1 << LR_REGNUM))
14398 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14399 /* If returning from an interrupt, restore the CPSR. */
14400 if (IS_INTERRUPT (func_type))
14407 output_asm_insn (instr, & operand);
14409 /* See if we need to generate an extra instruction to
14410 perform the actual function return. */
14412 && func_type != ARM_FT_INTERWORKED
14413 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14415 /* The return has already been handled
14416 by loading the LR into the PC. */
14423 switch ((int) ARM_FUNC_TYPE (func_type))
14427 /* ??? This is wrong for unified assembly syntax. */
14428 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14431 case ARM_FT_INTERWORKED:
14432 sprintf (instr, "bx%s\t%%|lr", conditional);
14435 case ARM_FT_EXCEPTION:
14436 /* ??? This is wrong for unified assembly syntax. */
14437 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14441 /* Use bx if it's available. */
14442 if (arm_arch5 || arm_arch4t)
14443 sprintf (instr, "bx%s\t%%|lr", conditional);
14445 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14449 output_asm_insn (instr, & operand);
14455 /* Write the function name into the code section, directly preceding
14456 the function prologue.
14458 Code will be output similar to this:
14460 .ascii "arm_poke_function_name", 0
14463 .word 0xff000000 + (t1 - t0)
14464 arm_poke_function_name
14466 stmfd sp!, {fp, ip, lr, pc}
14469 When performing a stack backtrace, code can inspect the value
14470 of 'pc' stored at 'fp' + 0. If the trace function then looks
14471 at location pc - 12 and the top 8 bits are set, then we know
14472 that there is a function name embedded immediately preceding this
14473 location and has length ((pc[-3]) & 0xff000000).
14475 We assume that pc is declared as a pointer to an unsigned long.
14477 It is of no benefit to output the function name if we are assembling
14478 a leaf function. These function types will not contain a stack
14479 backtrace structure, therefore it is not possible to determine the
14482 arm_poke_function_name (FILE *stream, const char *name)
14484 unsigned long alignlength;
14485 unsigned long length;
14488 length = strlen (name) + 1;
14489 alignlength = ROUND_UP_WORD (length);
14491 ASM_OUTPUT_ASCII (stream, name, length);
14492 ASM_OUTPUT_ALIGN (stream, 2);
14493 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14494 assemble_aligned_integer (UNITS_PER_WORD, x);
14497 /* Place some comments into the assembler stream
14498 describing the current function. */
14500 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14502 unsigned long func_type;
14506 thumb1_output_function_prologue (f, frame_size);
14510 /* Sanity check. */
14511 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14513 func_type = arm_current_func_type ();
14515 switch ((int) ARM_FUNC_TYPE (func_type))
14518 case ARM_FT_NORMAL:
14520 case ARM_FT_INTERWORKED:
14521 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14524 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14527 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14529 case ARM_FT_EXCEPTION:
14530 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14534 if (IS_NAKED (func_type))
14535 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14537 if (IS_VOLATILE (func_type))
14538 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14540 if (IS_NESTED (func_type))
14541 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14542 if (IS_STACKALIGN (func_type))
14543 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14545 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14547 crtl->args.pretend_args_size, frame_size);
14549 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14550 frame_pointer_needed,
14551 cfun->machine->uses_anonymous_args);
14553 if (cfun->machine->lr_save_eliminated)
14554 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14556 if (crtl->calls_eh_return)
14557 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14562 arm_output_epilogue (rtx sibling)
14565 unsigned long saved_regs_mask;
14566 unsigned long func_type;
14567 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14568 frame that is $fp + 4 for a non-variadic function. */
14569 int floats_offset = 0;
14571 FILE * f = asm_out_file;
14572 unsigned int lrm_count = 0;
14573 int really_return = (sibling == NULL);
14575 arm_stack_offsets *offsets;
14577 /* If we have already generated the return instruction
14578 then it is futile to generate anything else. */
14579 if (use_return_insn (FALSE, sibling) &&
14580 (cfun->machine->return_used_this_function != 0))
14583 func_type = arm_current_func_type ();
14585 if (IS_NAKED (func_type))
14586 /* Naked functions don't have epilogues. */
14589 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14593 /* A volatile function should never return. Call abort. */
14594 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14595 assemble_external_libcall (op);
14596 output_asm_insn ("bl\t%a0", &op);
14601 /* If we are throwing an exception, then we really must be doing a
14602 return, so we can't tail-call. */
14603 gcc_assert (!crtl->calls_eh_return || really_return);
14605 offsets = arm_get_frame_offsets ();
14606 saved_regs_mask = offsets->saved_regs_mask;
14609 lrm_count = bit_count (saved_regs_mask);
14611 floats_offset = offsets->saved_args;
14612 /* Compute how far away the floats will be. */
14613 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14614 if (saved_regs_mask & (1 << reg))
14615 floats_offset += 4;
14617 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14619 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14620 int vfp_offset = offsets->frame;
14622 if (TARGET_FPA_EMU2)
14624 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14625 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14627 floats_offset += 12;
14628 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14629 reg, FP_REGNUM, floats_offset - vfp_offset);
14634 start_reg = LAST_FPA_REGNUM;
14636 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14638 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14640 floats_offset += 12;
14642 /* We can't unstack more than four registers at once. */
14643 if (start_reg - reg == 3)
14645 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14646 reg, FP_REGNUM, floats_offset - vfp_offset);
14647 start_reg = reg - 1;
14652 if (reg != start_reg)
14653 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14654 reg + 1, start_reg - reg,
14655 FP_REGNUM, floats_offset - vfp_offset);
14656 start_reg = reg - 1;
14660 /* Just in case the last register checked also needs unstacking. */
14661 if (reg != start_reg)
14662 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14663 reg + 1, start_reg - reg,
14664 FP_REGNUM, floats_offset - vfp_offset);
14667 if (TARGET_HARD_FLOAT && TARGET_VFP)
14671 /* The fldmd insns do not have base+offset addressing
14672 modes, so we use IP to hold the address. */
14673 saved_size = arm_get_vfp_saved_size ();
14675 if (saved_size > 0)
14677 floats_offset += saved_size;
14678 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14679 FP_REGNUM, floats_offset - vfp_offset);
14681 start_reg = FIRST_VFP_REGNUM;
14682 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14684 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14685 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14687 if (start_reg != reg)
14688 vfp_output_fldmd (f, IP_REGNUM,
14689 (start_reg - FIRST_VFP_REGNUM) / 2,
14690 (reg - start_reg) / 2);
14691 start_reg = reg + 2;
14694 if (start_reg != reg)
14695 vfp_output_fldmd (f, IP_REGNUM,
14696 (start_reg - FIRST_VFP_REGNUM) / 2,
14697 (reg - start_reg) / 2);
14702 /* The frame pointer is guaranteed to be non-double-word aligned.
14703 This is because it is set to (old_stack_pointer - 4) and the
14704 old_stack_pointer was double word aligned. Thus the offset to
14705 the iWMMXt registers to be loaded must also be non-double-word
14706 sized, so that the resultant address *is* double-word aligned.
14707 We can ignore floats_offset since that was already included in
14708 the live_regs_mask. */
14709 lrm_count += (lrm_count % 2 ? 2 : 1);
14711 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14712 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14714 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14715 reg, FP_REGNUM, lrm_count * 4);
14720 /* saved_regs_mask should contain the IP, which at the time of stack
14721 frame generation actually contains the old stack pointer. So a
14722 quick way to unwind the stack is just pop the IP register directly
14723 into the stack pointer. */
14724 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14725 saved_regs_mask &= ~ (1 << IP_REGNUM);
14726 saved_regs_mask |= (1 << SP_REGNUM);
14728 /* There are two registers left in saved_regs_mask - LR and PC. We
14729 only need to restore the LR register (the return address), but to
14730 save time we can load it directly into the PC, unless we need a
14731 special function exit sequence, or we are not really returning. */
14733 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14734 && !crtl->calls_eh_return)
14735 /* Delete the LR from the register mask, so that the LR on
14736 the stack is loaded into the PC in the register mask. */
14737 saved_regs_mask &= ~ (1 << LR_REGNUM);
14739 saved_regs_mask &= ~ (1 << PC_REGNUM);
14741 /* We must use SP as the base register, because SP is one of the
14742 registers being restored. If an interrupt or page fault
14743 happens in the ldm instruction, the SP might or might not
14744 have been restored. That would be bad, as then SP will no
14745 longer indicate the safe area of stack, and we can get stack
14746 corruption. Using SP as the base register means that it will
14747 be reset correctly to the original value, should an interrupt
14748 occur. If the stack pointer already points at the right
14749 place, then omit the subtraction. */
14750 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14751 || cfun->calls_alloca)
14752 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14753 4 * bit_count (saved_regs_mask));
14754 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14756 if (IS_INTERRUPT (func_type))
14757 /* Interrupt handlers will have pushed the
14758 IP onto the stack, so restore it now. */
14759 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14763 /* This branch is executed for ARM mode (non-apcs frames) and
14764 Thumb-2 mode. Frame layout is essentially the same for those
14765 cases, except that in ARM mode frame pointer points to the
14766 first saved register, while in Thumb-2 mode the frame pointer points
14767 to the last saved register.
14769 It is possible to make frame pointer point to last saved
14770 register in both cases, and remove some conditionals below.
14771 That means that fp setup in prologue would be just "mov fp, sp"
14772 and sp restore in epilogue would be just "mov sp, fp", whereas
14773 now we have to use add/sub in those cases. However, the value
14774 of that would be marginal, as both mov and add/sub are 32-bit
14775 in ARM mode, and it would require extra conditionals
14776 in arm_expand_prologue to distingish ARM-apcs-frame case
14777 (where frame pointer is required to point at first register)
14778 and ARM-non-apcs-frame. Therefore, such change is postponed
14779 until real need arise. */
14780 unsigned HOST_WIDE_INT amount;
14782 /* Restore stack pointer if necessary. */
14783 if (TARGET_ARM && frame_pointer_needed)
14785 operands[0] = stack_pointer_rtx;
14786 operands[1] = hard_frame_pointer_rtx;
14788 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14789 output_add_immediate (operands);
14793 if (frame_pointer_needed)
14795 /* For Thumb-2 restore sp from the frame pointer.
14796 Operand restrictions mean we have to incrememnt FP, then copy
14798 amount = offsets->locals_base - offsets->saved_regs;
14799 operands[0] = hard_frame_pointer_rtx;
14803 unsigned long count;
14804 operands[0] = stack_pointer_rtx;
14805 amount = offsets->outgoing_args - offsets->saved_regs;
14806 /* pop call clobbered registers if it avoids a
14807 separate stack adjustment. */
14808 count = offsets->saved_regs - offsets->saved_args;
14811 && !crtl->calls_eh_return
14812 && bit_count(saved_regs_mask) * 4 == count
14813 && !IS_INTERRUPT (func_type)
14814 && !crtl->tail_call_emit)
14816 unsigned long mask;
14817 /* Preserve return values, of any size. */
14818 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14820 mask &= ~saved_regs_mask;
14822 while (bit_count (mask) * 4 > amount)
14824 while ((mask & (1 << reg)) == 0)
14826 mask &= ~(1 << reg);
14828 if (bit_count (mask) * 4 == amount) {
14830 saved_regs_mask |= mask;
14837 operands[1] = operands[0];
14838 operands[2] = GEN_INT (amount);
14839 output_add_immediate (operands);
14841 if (frame_pointer_needed)
14842 asm_fprintf (f, "\tmov\t%r, %r\n",
14843 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14846 if (TARGET_FPA_EMU2)
14848 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14849 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14850 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14855 start_reg = FIRST_FPA_REGNUM;
14857 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14859 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14861 if (reg - start_reg == 3)
14863 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14864 start_reg, SP_REGNUM);
14865 start_reg = reg + 1;
14870 if (reg != start_reg)
14871 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14872 start_reg, reg - start_reg,
14875 start_reg = reg + 1;
14879 /* Just in case the last register checked also needs unstacking. */
14880 if (reg != start_reg)
14881 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14882 start_reg, reg - start_reg, SP_REGNUM);
14885 if (TARGET_HARD_FLOAT && TARGET_VFP)
14887 int end_reg = LAST_VFP_REGNUM + 1;
14889 /* Scan the registers in reverse order. We need to match
14890 any groupings made in the prologue and generate matching
14892 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14894 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14895 && (!df_regs_ever_live_p (reg + 1)
14896 || call_used_regs[reg + 1]))
14898 if (end_reg > reg + 2)
14899 vfp_output_fldmd (f, SP_REGNUM,
14900 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14901 (end_reg - (reg + 2)) / 2);
14905 if (end_reg > reg + 2)
14906 vfp_output_fldmd (f, SP_REGNUM, 0,
14907 (end_reg - (reg + 2)) / 2);
14911 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14912 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14913 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14915 /* If we can, restore the LR into the PC. */
14916 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14917 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14918 && !IS_STACKALIGN (func_type)
14920 && crtl->args.pretend_args_size == 0
14921 && saved_regs_mask & (1 << LR_REGNUM)
14922 && !crtl->calls_eh_return)
14924 saved_regs_mask &= ~ (1 << LR_REGNUM);
14925 saved_regs_mask |= (1 << PC_REGNUM);
14926 rfe = IS_INTERRUPT (func_type);
14931 /* Load the registers off the stack. If we only have one register
14932 to load use the LDR instruction - it is faster. For Thumb-2
14933 always use pop and the assembler will pick the best instruction.*/
14934 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14935 && !IS_INTERRUPT(func_type))
14937 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14939 else if (saved_regs_mask)
14941 if (saved_regs_mask & (1 << SP_REGNUM))
14942 /* Note - write back to the stack register is not enabled
14943 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14944 in the list of registers and if we add writeback the
14945 instruction becomes UNPREDICTABLE. */
14946 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14948 else if (TARGET_ARM)
14949 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14952 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14955 if (crtl->args.pretend_args_size)
14957 /* Unwind the pre-pushed regs. */
14958 operands[0] = operands[1] = stack_pointer_rtx;
14959 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14960 output_add_immediate (operands);
14964 /* We may have already restored PC directly from the stack. */
14965 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14968 /* Stack adjustment for exception handler. */
14969 if (crtl->calls_eh_return)
14970 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14971 ARM_EH_STACKADJ_REGNUM);
14973 /* Generate the return instruction. */
14974 switch ((int) ARM_FUNC_TYPE (func_type))
14978 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14981 case ARM_FT_EXCEPTION:
14982 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14985 case ARM_FT_INTERWORKED:
14986 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14990 if (IS_STACKALIGN (func_type))
14992 /* See comment in arm_expand_prologue. */
14993 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14995 if (arm_arch5 || arm_arch4t)
14996 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14998 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
15006 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
15007 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
15009 arm_stack_offsets *offsets;
15015 /* Emit any call-via-reg trampolines that are needed for v4t support
15016 of call_reg and call_value_reg type insns. */
15017 for (regno = 0; regno < LR_REGNUM; regno++)
15019 rtx label = cfun->machine->call_via[regno];
15023 switch_to_section (function_section (current_function_decl));
15024 targetm.asm_out.internal_label (asm_out_file, "L",
15025 CODE_LABEL_NUMBER (label));
15026 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
15030 /* ??? Probably not safe to set this here, since it assumes that a
15031 function will be emitted as assembly immediately after we generate
15032 RTL for it. This does not happen for inline functions. */
15033 cfun->machine->return_used_this_function = 0;
15035 else /* TARGET_32BIT */
15037 /* We need to take into account any stack-frame rounding. */
15038 offsets = arm_get_frame_offsets ();
15040 gcc_assert (!use_return_insn (FALSE, NULL)
15041 || (cfun->machine->return_used_this_function != 0)
15042 || offsets->saved_regs == offsets->outgoing_args
15043 || frame_pointer_needed);
15045 /* Reset the ARM-specific per-function variables. */
15046 after_arm_reorg = 0;
15050 /* Generate and emit an insn that we will recognize as a push_multi.
15051 Unfortunately, since this insn does not reflect very well the actual
15052 semantics of the operation, we need to annotate the insn for the benefit
15053 of DWARF2 frame unwind information. */
15055 emit_multi_reg_push (unsigned long mask)
15058 int num_dwarf_regs;
15062 int dwarf_par_index;
15065 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15066 if (mask & (1 << i))
15069 gcc_assert (num_regs && num_regs <= 16);
15071 /* We don't record the PC in the dwarf frame information. */
15072 num_dwarf_regs = num_regs;
15073 if (mask & (1 << PC_REGNUM))
15076 /* For the body of the insn we are going to generate an UNSPEC in
15077 parallel with several USEs. This allows the insn to be recognized
15078 by the push_multi pattern in the arm.md file.
15080 The body of the insn looks something like this:
15083 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15084 (const_int:SI <num>)))
15085 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15091 For the frame note however, we try to be more explicit and actually
15092 show each register being stored into the stack frame, plus a (single)
15093 decrement of the stack pointer. We do it this way in order to be
15094 friendly to the stack unwinding code, which only wants to see a single
15095 stack decrement per instruction. The RTL we generate for the note looks
15096 something like this:
15099 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15100 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15101 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15102 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15106 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15107 instead we'd have a parallel expression detailing all
15108 the stores to the various memory addresses so that debug
15109 information is more up-to-date. Remember however while writing
15110 this to take care of the constraints with the push instruction.
15112 Note also that this has to be taken care of for the VFP registers.
15114 For more see PR43399. */
15116 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15117 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15118 dwarf_par_index = 1;
15120 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15122 if (mask & (1 << i))
15124 reg = gen_rtx_REG (SImode, i);
15126 XVECEXP (par, 0, 0)
15127 = gen_rtx_SET (VOIDmode,
15130 gen_rtx_PRE_MODIFY (Pmode,
15133 (stack_pointer_rtx,
15136 gen_rtx_UNSPEC (BLKmode,
15137 gen_rtvec (1, reg),
15138 UNSPEC_PUSH_MULT));
15140 if (i != PC_REGNUM)
15142 tmp = gen_rtx_SET (VOIDmode,
15143 gen_frame_mem (SImode, stack_pointer_rtx),
15145 RTX_FRAME_RELATED_P (tmp) = 1;
15146 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15154 for (j = 1, i++; j < num_regs; i++)
15156 if (mask & (1 << i))
15158 reg = gen_rtx_REG (SImode, i);
15160 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15162 if (i != PC_REGNUM)
15165 = gen_rtx_SET (VOIDmode,
15168 plus_constant (stack_pointer_rtx,
15171 RTX_FRAME_RELATED_P (tmp) = 1;
15172 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15179 par = emit_insn (par);
15181 tmp = gen_rtx_SET (VOIDmode,
15183 plus_constant (stack_pointer_rtx, -4 * num_regs));
15184 RTX_FRAME_RELATED_P (tmp) = 1;
15185 XVECEXP (dwarf, 0, 0) = tmp;
15187 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15192 /* Calculate the size of the return value that is passed in registers. */
15194 arm_size_return_regs (void)
15196 enum machine_mode mode;
15198 if (crtl->return_rtx != 0)
15199 mode = GET_MODE (crtl->return_rtx);
15201 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15203 return GET_MODE_SIZE (mode);
15207 emit_sfm (int base_reg, int count)
15214 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
15215 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
15217 reg = gen_rtx_REG (XFmode, base_reg++);
15219 XVECEXP (par, 0, 0)
15220 = gen_rtx_SET (VOIDmode,
15223 gen_rtx_PRE_MODIFY (Pmode,
15226 (stack_pointer_rtx,
15229 gen_rtx_UNSPEC (BLKmode,
15230 gen_rtvec (1, reg),
15231 UNSPEC_PUSH_MULT));
15232 tmp = gen_rtx_SET (VOIDmode,
15233 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
15234 RTX_FRAME_RELATED_P (tmp) = 1;
15235 XVECEXP (dwarf, 0, 1) = tmp;
15237 for (i = 1; i < count; i++)
15239 reg = gen_rtx_REG (XFmode, base_reg++);
15240 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
15242 tmp = gen_rtx_SET (VOIDmode,
15243 gen_frame_mem (XFmode,
15244 plus_constant (stack_pointer_rtx,
15247 RTX_FRAME_RELATED_P (tmp) = 1;
15248 XVECEXP (dwarf, 0, i + 1) = tmp;
15251 tmp = gen_rtx_SET (VOIDmode,
15253 plus_constant (stack_pointer_rtx, -12 * count));
15255 RTX_FRAME_RELATED_P (tmp) = 1;
15256 XVECEXP (dwarf, 0, 0) = tmp;
15258 par = emit_insn (par);
15259 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15265 /* Return true if the current function needs to save/restore LR. */
15268 thumb_force_lr_save (void)
15270 return !cfun->machine->lr_save_eliminated
15271 && (!leaf_function_p ()
15272 || thumb_far_jump_used_p ()
15273 || df_regs_ever_live_p (LR_REGNUM));
15277 /* Return true if r3 is used by any of the tail call insns in the
15278 current function. */
15281 any_sibcall_uses_r3 (void)
15286 if (!crtl->tail_call_emit)
15288 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15289 if (e->flags & EDGE_SIBCALL)
15291 rtx call = BB_END (e->src);
15292 if (!CALL_P (call))
15293 call = prev_nonnote_nondebug_insn (call);
15294 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
15295 if (find_regno_fusage (call, USE, 3))
15302 /* Compute the distance from register FROM to register TO.
15303 These can be the arg pointer (26), the soft frame pointer (25),
15304 the stack pointer (13) or the hard frame pointer (11).
15305 In thumb mode r7 is used as the soft frame pointer, if needed.
15306 Typical stack layout looks like this:
15308 old stack pointer -> | |
15311 | | saved arguments for
15312 | | vararg functions
15315 hard FP & arg pointer -> | | \
15323 soft frame pointer -> | | /
15328 locals base pointer -> | | /
15333 current stack pointer -> | | /
15336 For a given function some or all of these stack components
15337 may not be needed, giving rise to the possibility of
15338 eliminating some of the registers.
15340 The values returned by this function must reflect the behavior
15341 of arm_expand_prologue() and arm_compute_save_reg_mask().
15343 The sign of the number returned reflects the direction of stack
15344 growth, so the values are positive for all eliminations except
15345 from the soft frame pointer to the hard frame pointer.
15347 SFP may point just inside the local variables block to ensure correct
15351 /* Calculate stack offsets. These are used to calculate register elimination
15352 offsets and in prologue/epilogue code. Also calculates which registers
15353 should be saved. */
15355 static arm_stack_offsets *
15356 arm_get_frame_offsets (void)
15358 struct arm_stack_offsets *offsets;
15359 unsigned long func_type;
15363 HOST_WIDE_INT frame_size;
15366 offsets = &cfun->machine->stack_offsets;
15368 /* We need to know if we are a leaf function. Unfortunately, it
15369 is possible to be called after start_sequence has been called,
15370 which causes get_insns to return the insns for the sequence,
15371 not the function, which will cause leaf_function_p to return
15372 the incorrect result.
15374 to know about leaf functions once reload has completed, and the
15375 frame size cannot be changed after that time, so we can safely
15376 use the cached value. */
15378 if (reload_completed)
15381 /* Initially this is the size of the local variables. It will translated
15382 into an offset once we have determined the size of preceding data. */
15383 frame_size = ROUND_UP_WORD (get_frame_size ());
15385 leaf = leaf_function_p ();
15387 /* Space for variadic functions. */
15388 offsets->saved_args = crtl->args.pretend_args_size;
15390 /* In Thumb mode this is incorrect, but never used. */
15391 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15392 arm_compute_static_chain_stack_bytes();
15396 unsigned int regno;
15398 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15399 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15400 saved = core_saved;
15402 /* We know that SP will be doubleword aligned on entry, and we must
15403 preserve that condition at any subroutine call. We also require the
15404 soft frame pointer to be doubleword aligned. */
15406 if (TARGET_REALLY_IWMMXT)
15408 /* Check for the call-saved iWMMXt registers. */
15409 for (regno = FIRST_IWMMXT_REGNUM;
15410 regno <= LAST_IWMMXT_REGNUM;
15412 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15416 func_type = arm_current_func_type ();
15417 if (! IS_VOLATILE (func_type))
15419 /* Space for saved FPA registers. */
15420 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15421 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15424 /* Space for saved VFP registers. */
15425 if (TARGET_HARD_FLOAT && TARGET_VFP)
15426 saved += arm_get_vfp_saved_size ();
15429 else /* TARGET_THUMB1 */
15431 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15432 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15433 saved = core_saved;
15434 if (TARGET_BACKTRACE)
15438 /* Saved registers include the stack frame. */
15439 offsets->saved_regs = offsets->saved_args + saved +
15440 arm_compute_static_chain_stack_bytes();
15441 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15442 /* A leaf function does not need any stack alignment if it has nothing
15444 if (leaf && frame_size == 0
15445 /* However if it calls alloca(), we have a dynamically allocated
15446 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
15447 && ! cfun->calls_alloca)
15449 offsets->outgoing_args = offsets->soft_frame;
15450 offsets->locals_base = offsets->soft_frame;
15454 /* Ensure SFP has the correct alignment. */
15455 if (ARM_DOUBLEWORD_ALIGN
15456 && (offsets->soft_frame & 7))
15458 offsets->soft_frame += 4;
15459 /* Try to align stack by pushing an extra reg. Don't bother doing this
15460 when there is a stack frame as the alignment will be rolled into
15461 the normal stack adjustment. */
15462 if (frame_size + crtl->outgoing_args_size == 0)
15466 /* If it is safe to use r3, then do so. This sometimes
15467 generates better code on Thumb-2 by avoiding the need to
15468 use 32-bit push/pop instructions. */
15469 if (! any_sibcall_uses_r3 ()
15470 && arm_size_return_regs () <= 12
15471 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15476 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15478 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15487 offsets->saved_regs += 4;
15488 offsets->saved_regs_mask |= (1 << reg);
15493 offsets->locals_base = offsets->soft_frame + frame_size;
15494 offsets->outgoing_args = (offsets->locals_base
15495 + crtl->outgoing_args_size);
15497 if (ARM_DOUBLEWORD_ALIGN)
15499 /* Ensure SP remains doubleword aligned. */
15500 if (offsets->outgoing_args & 7)
15501 offsets->outgoing_args += 4;
15502 gcc_assert (!(offsets->outgoing_args & 7));
15509 /* Calculate the relative offsets for the different stack pointers. Positive
15510 offsets are in the direction of stack growth. */
15513 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15515 arm_stack_offsets *offsets;
15517 offsets = arm_get_frame_offsets ();
15519 /* OK, now we have enough information to compute the distances.
15520 There must be an entry in these switch tables for each pair
15521 of registers in ELIMINABLE_REGS, even if some of the entries
15522 seem to be redundant or useless. */
15525 case ARG_POINTER_REGNUM:
15528 case THUMB_HARD_FRAME_POINTER_REGNUM:
15531 case FRAME_POINTER_REGNUM:
15532 /* This is the reverse of the soft frame pointer
15533 to hard frame pointer elimination below. */
15534 return offsets->soft_frame - offsets->saved_args;
15536 case ARM_HARD_FRAME_POINTER_REGNUM:
15537 /* This is only non-zero in the case where the static chain register
15538 is stored above the frame. */
15539 return offsets->frame - offsets->saved_args - 4;
15541 case STACK_POINTER_REGNUM:
15542 /* If nothing has been pushed on the stack at all
15543 then this will return -4. This *is* correct! */
15544 return offsets->outgoing_args - (offsets->saved_args + 4);
15547 gcc_unreachable ();
15549 gcc_unreachable ();
15551 case FRAME_POINTER_REGNUM:
15554 case THUMB_HARD_FRAME_POINTER_REGNUM:
15557 case ARM_HARD_FRAME_POINTER_REGNUM:
15558 /* The hard frame pointer points to the top entry in the
15559 stack frame. The soft frame pointer to the bottom entry
15560 in the stack frame. If there is no stack frame at all,
15561 then they are identical. */
15563 return offsets->frame - offsets->soft_frame;
15565 case STACK_POINTER_REGNUM:
15566 return offsets->outgoing_args - offsets->soft_frame;
15569 gcc_unreachable ();
15571 gcc_unreachable ();
15574 /* You cannot eliminate from the stack pointer.
15575 In theory you could eliminate from the hard frame
15576 pointer to the stack pointer, but this will never
15577 happen, since if a stack frame is not needed the
15578 hard frame pointer will never be used. */
15579 gcc_unreachable ();
15583 /* Given FROM and TO register numbers, say whether this elimination is
15584 allowed. Frame pointer elimination is automatically handled.
15586 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15587 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15588 pointer, we must eliminate FRAME_POINTER_REGNUM into
15589 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15590 ARG_POINTER_REGNUM. */
15593 arm_can_eliminate (const int from, const int to)
15595 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15596 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15597 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15598 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15602 /* Emit RTL to save coprocessor registers on function entry. Returns the
15603 number of bytes pushed. */
15606 arm_save_coproc_regs(void)
15608 int saved_size = 0;
15610 unsigned start_reg;
15613 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15614 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15616 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15617 insn = gen_rtx_MEM (V2SImode, insn);
15618 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15619 RTX_FRAME_RELATED_P (insn) = 1;
15623 /* Save any floating point call-saved registers used by this
15625 if (TARGET_FPA_EMU2)
15627 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15628 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15630 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15631 insn = gen_rtx_MEM (XFmode, insn);
15632 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15633 RTX_FRAME_RELATED_P (insn) = 1;
15639 start_reg = LAST_FPA_REGNUM;
15641 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15643 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15645 if (start_reg - reg == 3)
15647 insn = emit_sfm (reg, 4);
15648 RTX_FRAME_RELATED_P (insn) = 1;
15650 start_reg = reg - 1;
15655 if (start_reg != reg)
15657 insn = emit_sfm (reg + 1, start_reg - reg);
15658 RTX_FRAME_RELATED_P (insn) = 1;
15659 saved_size += (start_reg - reg) * 12;
15661 start_reg = reg - 1;
15665 if (start_reg != reg)
15667 insn = emit_sfm (reg + 1, start_reg - reg);
15668 saved_size += (start_reg - reg) * 12;
15669 RTX_FRAME_RELATED_P (insn) = 1;
15672 if (TARGET_HARD_FLOAT && TARGET_VFP)
15674 start_reg = FIRST_VFP_REGNUM;
15676 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15678 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15679 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15681 if (start_reg != reg)
15682 saved_size += vfp_emit_fstmd (start_reg,
15683 (reg - start_reg) / 2);
15684 start_reg = reg + 2;
15687 if (start_reg != reg)
15688 saved_size += vfp_emit_fstmd (start_reg,
15689 (reg - start_reg) / 2);
15695 /* Set the Thumb frame pointer from the stack pointer. */
15698 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15700 HOST_WIDE_INT amount;
15703 amount = offsets->outgoing_args - offsets->locals_base;
15705 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15706 stack_pointer_rtx, GEN_INT (amount)));
15709 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15710 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15711 expects the first two operands to be the same. */
15714 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15716 hard_frame_pointer_rtx));
15720 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15721 hard_frame_pointer_rtx,
15722 stack_pointer_rtx));
15724 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15725 plus_constant (stack_pointer_rtx, amount));
15726 RTX_FRAME_RELATED_P (dwarf) = 1;
15727 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15730 RTX_FRAME_RELATED_P (insn) = 1;
15733 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15736 arm_expand_prologue (void)
15741 unsigned long live_regs_mask;
15742 unsigned long func_type;
15744 int saved_pretend_args = 0;
15745 int saved_regs = 0;
15746 unsigned HOST_WIDE_INT args_to_push;
15747 arm_stack_offsets *offsets;
15749 func_type = arm_current_func_type ();
15751 /* Naked functions don't have prologues. */
15752 if (IS_NAKED (func_type))
15755 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15756 args_to_push = crtl->args.pretend_args_size;
15758 /* Compute which register we will have to save onto the stack. */
15759 offsets = arm_get_frame_offsets ();
15760 live_regs_mask = offsets->saved_regs_mask;
15762 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15764 if (IS_STACKALIGN (func_type))
15769 /* Handle a word-aligned stack pointer. We generate the following:
15774 <save and restore r0 in normal prologue/epilogue>
15778 The unwinder doesn't need to know about the stack realignment.
15779 Just tell it we saved SP in r0. */
15780 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15782 r0 = gen_rtx_REG (SImode, 0);
15783 r1 = gen_rtx_REG (SImode, 1);
15784 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15785 compiler won't choke. */
15786 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15787 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15788 insn = gen_movsi (r0, stack_pointer_rtx);
15789 RTX_FRAME_RELATED_P (insn) = 1;
15790 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15792 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15793 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15796 /* For APCS frames, if IP register is clobbered
15797 when creating frame, save that register in a special
15799 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15801 if (IS_INTERRUPT (func_type))
15803 /* Interrupt functions must not corrupt any registers.
15804 Creating a frame pointer however, corrupts the IP
15805 register, so we must push it first. */
15806 insn = emit_multi_reg_push (1 << IP_REGNUM);
15808 /* Do not set RTX_FRAME_RELATED_P on this insn.
15809 The dwarf stack unwinding code only wants to see one
15810 stack decrement per function, and this is not it. If
15811 this instruction is labeled as being part of the frame
15812 creation sequence then dwarf2out_frame_debug_expr will
15813 die when it encounters the assignment of IP to FP
15814 later on, since the use of SP here establishes SP as
15815 the CFA register and not IP.
15817 Anyway this instruction is not really part of the stack
15818 frame creation although it is part of the prologue. */
15820 else if (IS_NESTED (func_type))
15822 /* The Static chain register is the same as the IP register
15823 used as a scratch register during stack frame creation.
15824 To get around this need to find somewhere to store IP
15825 whilst the frame is being created. We try the following
15828 1. The last argument register.
15829 2. A slot on the stack above the frame. (This only
15830 works if the function is not a varargs function).
15831 3. Register r3, after pushing the argument registers
15834 Note - we only need to tell the dwarf2 backend about the SP
15835 adjustment in the second variant; the static chain register
15836 doesn't need to be unwound, as it doesn't contain a value
15837 inherited from the caller. */
15839 if (df_regs_ever_live_p (3) == false)
15840 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15841 else if (args_to_push == 0)
15845 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15848 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15849 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15852 /* Just tell the dwarf backend that we adjusted SP. */
15853 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15854 plus_constant (stack_pointer_rtx,
15856 RTX_FRAME_RELATED_P (insn) = 1;
15857 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15861 /* Store the args on the stack. */
15862 if (cfun->machine->uses_anonymous_args)
15863 insn = emit_multi_reg_push
15864 ((0xf0 >> (args_to_push / 4)) & 0xf);
15867 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15868 GEN_INT (- args_to_push)));
15870 RTX_FRAME_RELATED_P (insn) = 1;
15872 saved_pretend_args = 1;
15873 fp_offset = args_to_push;
15876 /* Now reuse r3 to preserve IP. */
15877 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15881 insn = emit_set_insn (ip_rtx,
15882 plus_constant (stack_pointer_rtx, fp_offset));
15883 RTX_FRAME_RELATED_P (insn) = 1;
15888 /* Push the argument registers, or reserve space for them. */
15889 if (cfun->machine->uses_anonymous_args)
15890 insn = emit_multi_reg_push
15891 ((0xf0 >> (args_to_push / 4)) & 0xf);
15894 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15895 GEN_INT (- args_to_push)));
15896 RTX_FRAME_RELATED_P (insn) = 1;
15899 /* If this is an interrupt service routine, and the link register
15900 is going to be pushed, and we're not generating extra
15901 push of IP (needed when frame is needed and frame layout if apcs),
15902 subtracting four from LR now will mean that the function return
15903 can be done with a single instruction. */
15904 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15905 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15906 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15909 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15911 emit_set_insn (lr, plus_constant (lr, -4));
15914 if (live_regs_mask)
15916 saved_regs += bit_count (live_regs_mask) * 4;
15917 if (optimize_size && !frame_pointer_needed
15918 && saved_regs == offsets->saved_regs - offsets->saved_args)
15920 /* If no coprocessor registers are being pushed and we don't have
15921 to worry about a frame pointer then push extra registers to
15922 create the stack frame. This is done is a way that does not
15923 alter the frame layout, so is independent of the epilogue. */
15927 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15929 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15930 if (frame && n * 4 >= frame)
15933 live_regs_mask |= (1 << n) - 1;
15934 saved_regs += frame;
15937 insn = emit_multi_reg_push (live_regs_mask);
15938 RTX_FRAME_RELATED_P (insn) = 1;
15941 if (! IS_VOLATILE (func_type))
15942 saved_regs += arm_save_coproc_regs ();
15944 if (frame_pointer_needed && TARGET_ARM)
15946 /* Create the new frame pointer. */
15947 if (TARGET_APCS_FRAME)
15949 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15950 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15951 RTX_FRAME_RELATED_P (insn) = 1;
15953 if (IS_NESTED (func_type))
15955 /* Recover the static chain register. */
15956 if (!df_regs_ever_live_p (3)
15957 || saved_pretend_args)
15958 insn = gen_rtx_REG (SImode, 3);
15959 else /* if (crtl->args.pretend_args_size == 0) */
15961 insn = plus_constant (hard_frame_pointer_rtx, 4);
15962 insn = gen_frame_mem (SImode, insn);
15964 emit_set_insn (ip_rtx, insn);
15965 /* Add a USE to stop propagate_one_insn() from barfing. */
15966 emit_insn (gen_prologue_use (ip_rtx));
15971 insn = GEN_INT (saved_regs - 4);
15972 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15973 stack_pointer_rtx, insn));
15974 RTX_FRAME_RELATED_P (insn) = 1;
15978 if (flag_stack_usage)
15979 current_function_static_stack_size
15980 = offsets->outgoing_args - offsets->saved_args;
15982 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15984 /* This add can produce multiple insns for a large constant, so we
15985 need to get tricky. */
15986 rtx last = get_last_insn ();
15988 amount = GEN_INT (offsets->saved_args + saved_regs
15989 - offsets->outgoing_args);
15991 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15995 last = last ? NEXT_INSN (last) : get_insns ();
15996 RTX_FRAME_RELATED_P (last) = 1;
15998 while (last != insn);
16000 /* If the frame pointer is needed, emit a special barrier that
16001 will prevent the scheduler from moving stores to the frame
16002 before the stack adjustment. */
16003 if (frame_pointer_needed)
16004 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
16005 hard_frame_pointer_rtx));
16009 if (frame_pointer_needed && TARGET_THUMB2)
16010 thumb_set_frame_pointer (offsets);
16012 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16014 unsigned long mask;
16016 mask = live_regs_mask;
16017 mask &= THUMB2_WORK_REGS;
16018 if (!IS_NESTED (func_type))
16019 mask |= (1 << IP_REGNUM);
16020 arm_load_pic_register (mask);
16023 /* If we are profiling, make sure no instructions are scheduled before
16024 the call to mcount. Similarly if the user has requested no
16025 scheduling in the prolog. Similarly if we want non-call exceptions
16026 using the EABI unwinder, to prevent faulting instructions from being
16027 swapped with a stack adjustment. */
16028 if (crtl->profile || !TARGET_SCHED_PROLOG
16029 || (arm_except_unwind_info (&global_options) == UI_TARGET
16030 && cfun->can_throw_non_call_exceptions))
16031 emit_insn (gen_blockage ());
16033 /* If the link register is being kept alive, with the return address in it,
16034 then make sure that it does not get reused by the ce2 pass. */
16035 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
16036 cfun->machine->lr_save_eliminated = 1;
16039 /* Print condition code to STREAM. Helper function for arm_print_operand. */
16041 arm_print_condition (FILE *stream)
16043 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
16045 /* Branch conversion is not implemented for Thumb-2. */
16048 output_operand_lossage ("predicated Thumb instruction");
16051 if (current_insn_predicate != NULL)
16053 output_operand_lossage
16054 ("predicated instruction in conditional sequence");
16058 fputs (arm_condition_codes[arm_current_cc], stream);
16060 else if (current_insn_predicate)
16062 enum arm_cond_code code;
16066 output_operand_lossage ("predicated Thumb instruction");
16070 code = get_arm_condition_code (current_insn_predicate);
16071 fputs (arm_condition_codes[code], stream);
16076 /* If CODE is 'd', then the X is a condition operand and the instruction
16077 should only be executed if the condition is true.
16078 if CODE is 'D', then the X is a condition operand and the instruction
16079 should only be executed if the condition is false: however, if the mode
16080 of the comparison is CCFPEmode, then always execute the instruction -- we
16081 do this because in these circumstances !GE does not necessarily imply LT;
16082 in these cases the instruction pattern will take care to make sure that
16083 an instruction containing %d will follow, thereby undoing the effects of
16084 doing this instruction unconditionally.
16085 If CODE is 'N' then X is a floating point operand that must be negated
16087 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16088 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16090 arm_print_operand (FILE *stream, rtx x, int code)
16095 fputs (ASM_COMMENT_START, stream);
16099 fputs (user_label_prefix, stream);
16103 fputs (REGISTER_PREFIX, stream);
16107 arm_print_condition (stream);
16111 /* Nothing in unified syntax, otherwise the current condition code. */
16112 if (!TARGET_UNIFIED_ASM)
16113 arm_print_condition (stream);
16117 /* The current condition code in unified syntax, otherwise nothing. */
16118 if (TARGET_UNIFIED_ASM)
16119 arm_print_condition (stream);
16123 /* The current condition code for a condition code setting instruction.
16124 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16125 if (TARGET_UNIFIED_ASM)
16127 fputc('s', stream);
16128 arm_print_condition (stream);
16132 arm_print_condition (stream);
16133 fputc('s', stream);
16138 /* If the instruction is conditionally executed then print
16139 the current condition code, otherwise print 's'. */
16140 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16141 if (current_insn_predicate)
16142 arm_print_condition (stream);
16144 fputc('s', stream);
16147 /* %# is a "break" sequence. It doesn't output anything, but is used to
16148 separate e.g. operand numbers from following text, if that text consists
16149 of further digits which we don't want to be part of the operand
16157 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16158 r = real_value_negate (&r);
16159 fprintf (stream, "%s", fp_const_from_val (&r));
16163 /* An integer or symbol address without a preceding # sign. */
16165 switch (GET_CODE (x))
16168 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16172 output_addr_const (stream, x);
16176 gcc_unreachable ();
16181 if (GET_CODE (x) == CONST_INT)
16184 val = ARM_SIGN_EXTEND (~INTVAL (x));
16185 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16189 putc ('~', stream);
16190 output_addr_const (stream, x);
16195 /* The low 16 bits of an immediate constant. */
16196 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16200 fprintf (stream, "%s", arithmetic_instr (x, 1));
16203 /* Truncate Cirrus shift counts. */
16205 if (GET_CODE (x) == CONST_INT)
16207 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
16210 arm_print_operand (stream, x, 0);
16214 fprintf (stream, "%s", arithmetic_instr (x, 0));
16222 if (!shift_operator (x, SImode))
16224 output_operand_lossage ("invalid shift operand");
16228 shift = shift_op (x, &val);
16232 fprintf (stream, ", %s ", shift);
16234 arm_print_operand (stream, XEXP (x, 1), 0);
16236 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16241 /* An explanation of the 'Q', 'R' and 'H' register operands:
16243 In a pair of registers containing a DI or DF value the 'Q'
16244 operand returns the register number of the register containing
16245 the least significant part of the value. The 'R' operand returns
16246 the register number of the register containing the most
16247 significant part of the value.
16249 The 'H' operand returns the higher of the two register numbers.
16250 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16251 same as the 'Q' operand, since the most significant part of the
16252 value is held in the lower number register. The reverse is true
16253 on systems where WORDS_BIG_ENDIAN is false.
16255 The purpose of these operands is to distinguish between cases
16256 where the endian-ness of the values is important (for example
16257 when they are added together), and cases where the endian-ness
16258 is irrelevant, but the order of register operations is important.
16259 For example when loading a value from memory into a register
16260 pair, the endian-ness does not matter. Provided that the value
16261 from the lower memory address is put into the lower numbered
16262 register, and the value from the higher address is put into the
16263 higher numbered register, the load will work regardless of whether
16264 the value being loaded is big-wordian or little-wordian. The
16265 order of the two register loads can matter however, if the address
16266 of the memory location is actually held in one of the registers
16267 being overwritten by the load.
16269 The 'Q' and 'R' constraints are also available for 64-bit
16272 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16274 rtx part = gen_lowpart (SImode, x);
16275 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16279 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16281 output_operand_lossage ("invalid operand for code '%c'", code);
16285 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16289 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16291 enum machine_mode mode = GET_MODE (x);
16294 if (mode == VOIDmode)
16296 part = gen_highpart_mode (SImode, mode, x);
16297 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16301 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16303 output_operand_lossage ("invalid operand for code '%c'", code);
16307 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16311 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16313 output_operand_lossage ("invalid operand for code '%c'", code);
16317 asm_fprintf (stream, "%r", REGNO (x) + 1);
16321 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16323 output_operand_lossage ("invalid operand for code '%c'", code);
16327 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16331 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16333 output_operand_lossage ("invalid operand for code '%c'", code);
16337 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16341 asm_fprintf (stream, "%r",
16342 GET_CODE (XEXP (x, 0)) == REG
16343 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16347 asm_fprintf (stream, "{%r-%r}",
16349 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16352 /* Like 'M', but writing doubleword vector registers, for use by Neon
16356 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16357 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16359 asm_fprintf (stream, "{d%d}", regno);
16361 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16366 /* CONST_TRUE_RTX means always -- that's the default. */
16367 if (x == const_true_rtx)
16370 if (!COMPARISON_P (x))
16372 output_operand_lossage ("invalid operand for code '%c'", code);
16376 fputs (arm_condition_codes[get_arm_condition_code (x)],
16381 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16382 want to do that. */
16383 if (x == const_true_rtx)
16385 output_operand_lossage ("instruction never executed");
16388 if (!COMPARISON_P (x))
16390 output_operand_lossage ("invalid operand for code '%c'", code);
16394 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16395 (get_arm_condition_code (x))],
16399 /* Cirrus registers can be accessed in a variety of ways:
16400 single floating point (f)
16401 double floating point (d)
16403 64bit integer (dx). */
16404 case 'W': /* Cirrus register in F mode. */
16405 case 'X': /* Cirrus register in D mode. */
16406 case 'Y': /* Cirrus register in FX mode. */
16407 case 'Z': /* Cirrus register in DX mode. */
16408 gcc_assert (GET_CODE (x) == REG
16409 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16411 fprintf (stream, "mv%s%s",
16413 : code == 'X' ? "d"
16414 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16418 /* Print cirrus register in the mode specified by the register's mode. */
16421 int mode = GET_MODE (x);
16423 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16425 output_operand_lossage ("invalid operand for code '%c'", code);
16429 fprintf (stream, "mv%s%s",
16430 mode == DFmode ? "d"
16431 : mode == SImode ? "fx"
16432 : mode == DImode ? "dx"
16433 : "f", reg_names[REGNO (x)] + 2);
16439 if (GET_CODE (x) != REG
16440 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16441 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16442 /* Bad value for wCG register number. */
16444 output_operand_lossage ("invalid operand for code '%c'", code);
16449 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16452 /* Print an iWMMXt control register name. */
16454 if (GET_CODE (x) != CONST_INT
16456 || INTVAL (x) >= 16)
16457 /* Bad value for wC register number. */
16459 output_operand_lossage ("invalid operand for code '%c'", code);
16465 static const char * wc_reg_names [16] =
16467 "wCID", "wCon", "wCSSF", "wCASF",
16468 "wC4", "wC5", "wC6", "wC7",
16469 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16470 "wC12", "wC13", "wC14", "wC15"
16473 fprintf (stream, wc_reg_names [INTVAL (x)]);
16477 /* Print the high single-precision register of a VFP double-precision
16481 int mode = GET_MODE (x);
16484 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16486 output_operand_lossage ("invalid operand for code '%c'", code);
16491 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16493 output_operand_lossage ("invalid operand for code '%c'", code);
16497 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16501 /* Print a VFP/Neon double precision or quad precision register name. */
16505 int mode = GET_MODE (x);
16506 int is_quad = (code == 'q');
16509 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16511 output_operand_lossage ("invalid operand for code '%c'", code);
16515 if (GET_CODE (x) != REG
16516 || !IS_VFP_REGNUM (REGNO (x)))
16518 output_operand_lossage ("invalid operand for code '%c'", code);
16523 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16524 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16526 output_operand_lossage ("invalid operand for code '%c'", code);
16530 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16531 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16535 /* These two codes print the low/high doubleword register of a Neon quad
16536 register, respectively. For pair-structure types, can also print
16537 low/high quadword registers. */
16541 int mode = GET_MODE (x);
16544 if ((GET_MODE_SIZE (mode) != 16
16545 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16547 output_operand_lossage ("invalid operand for code '%c'", code);
16552 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16554 output_operand_lossage ("invalid operand for code '%c'", code);
16558 if (GET_MODE_SIZE (mode) == 16)
16559 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16560 + (code == 'f' ? 1 : 0));
16562 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16563 + (code == 'f' ? 1 : 0));
16567 /* Print a VFPv3 floating-point constant, represented as an integer
16571 int index = vfp3_const_double_index (x);
16572 gcc_assert (index != -1);
16573 fprintf (stream, "%d", index);
16577 /* Print bits representing opcode features for Neon.
16579 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16580 and polynomials as unsigned.
16582 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16584 Bit 2 is 1 for rounding functions, 0 otherwise. */
16586 /* Identify the type as 's', 'u', 'p' or 'f'. */
16589 HOST_WIDE_INT bits = INTVAL (x);
16590 fputc ("uspf"[bits & 3], stream);
16594 /* Likewise, but signed and unsigned integers are both 'i'. */
16597 HOST_WIDE_INT bits = INTVAL (x);
16598 fputc ("iipf"[bits & 3], stream);
16602 /* As for 'T', but emit 'u' instead of 'p'. */
16605 HOST_WIDE_INT bits = INTVAL (x);
16606 fputc ("usuf"[bits & 3], stream);
16610 /* Bit 2: rounding (vs none). */
16613 HOST_WIDE_INT bits = INTVAL (x);
16614 fputs ((bits & 4) != 0 ? "r" : "", stream);
16618 /* Memory operand for vld1/vst1 instruction. */
16622 bool postinc = FALSE;
16623 unsigned align, modesize, align_bits;
16625 gcc_assert (GET_CODE (x) == MEM);
16626 addr = XEXP (x, 0);
16627 if (GET_CODE (addr) == POST_INC)
16630 addr = XEXP (addr, 0);
16632 asm_fprintf (stream, "[%r", REGNO (addr));
16634 /* We know the alignment of this access, so we can emit a hint in the
16635 instruction (for some alignments) as an aid to the memory subsystem
16637 align = MEM_ALIGN (x) >> 3;
16638 modesize = GET_MODE_SIZE (GET_MODE (x));
16640 /* Only certain alignment specifiers are supported by the hardware. */
16641 if (modesize == 16 && (align % 32) == 0)
16643 else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
16645 else if ((align % 8) == 0)
16650 if (align_bits != 0)
16651 asm_fprintf (stream, ":%d", align_bits);
16653 asm_fprintf (stream, "]");
16656 fputs("!", stream);
16664 gcc_assert (GET_CODE (x) == MEM);
16665 addr = XEXP (x, 0);
16666 gcc_assert (GET_CODE (addr) == REG);
16667 asm_fprintf (stream, "[%r]", REGNO (addr));
16671 /* Translate an S register number into a D register number and element index. */
16674 int mode = GET_MODE (x);
16677 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16679 output_operand_lossage ("invalid operand for code '%c'", code);
16684 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16686 output_operand_lossage ("invalid operand for code '%c'", code);
16690 regno = regno - FIRST_VFP_REGNUM;
16691 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16695 /* Register specifier for vld1.16/vst1.16. Translate the S register
16696 number into a D register number and element index. */
16699 int mode = GET_MODE (x);
16702 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16704 output_operand_lossage ("invalid operand for code '%c'", code);
16709 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16711 output_operand_lossage ("invalid operand for code '%c'", code);
16715 regno = regno - FIRST_VFP_REGNUM;
16716 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16723 output_operand_lossage ("missing operand");
16727 switch (GET_CODE (x))
16730 asm_fprintf (stream, "%r", REGNO (x));
16734 output_memory_reference_mode = GET_MODE (x);
16735 output_address (XEXP (x, 0));
16742 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16743 sizeof (fpstr), 0, 1);
16744 fprintf (stream, "#%s", fpstr);
16747 fprintf (stream, "#%s", fp_immediate_constant (x));
16751 gcc_assert (GET_CODE (x) != NEG);
16752 fputc ('#', stream);
16753 if (GET_CODE (x) == HIGH)
16755 fputs (":lower16:", stream);
16759 output_addr_const (stream, x);
16765 /* Target hook for printing a memory address. */
16767 arm_print_operand_address (FILE *stream, rtx x)
16771 int is_minus = GET_CODE (x) == MINUS;
16773 if (GET_CODE (x) == REG)
16774 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16775 else if (GET_CODE (x) == PLUS || is_minus)
16777 rtx base = XEXP (x, 0);
16778 rtx index = XEXP (x, 1);
16779 HOST_WIDE_INT offset = 0;
16780 if (GET_CODE (base) != REG
16781 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16783 /* Ensure that BASE is a register. */
16784 /* (one of them must be). */
16785 /* Also ensure the SP is not used as in index register. */
16790 switch (GET_CODE (index))
16793 offset = INTVAL (index);
16796 asm_fprintf (stream, "[%r, #%wd]",
16797 REGNO (base), offset);
16801 asm_fprintf (stream, "[%r, %s%r]",
16802 REGNO (base), is_minus ? "-" : "",
16812 asm_fprintf (stream, "[%r, %s%r",
16813 REGNO (base), is_minus ? "-" : "",
16814 REGNO (XEXP (index, 0)));
16815 arm_print_operand (stream, index, 'S');
16816 fputs ("]", stream);
16821 gcc_unreachable ();
16824 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16825 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16827 extern enum machine_mode output_memory_reference_mode;
16829 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16831 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16832 asm_fprintf (stream, "[%r, #%s%d]!",
16833 REGNO (XEXP (x, 0)),
16834 GET_CODE (x) == PRE_DEC ? "-" : "",
16835 GET_MODE_SIZE (output_memory_reference_mode));
16837 asm_fprintf (stream, "[%r], #%s%d",
16838 REGNO (XEXP (x, 0)),
16839 GET_CODE (x) == POST_DEC ? "-" : "",
16840 GET_MODE_SIZE (output_memory_reference_mode));
16842 else if (GET_CODE (x) == PRE_MODIFY)
16844 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16845 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16846 asm_fprintf (stream, "#%wd]!",
16847 INTVAL (XEXP (XEXP (x, 1), 1)));
16849 asm_fprintf (stream, "%r]!",
16850 REGNO (XEXP (XEXP (x, 1), 1)));
16852 else if (GET_CODE (x) == POST_MODIFY)
16854 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16855 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16856 asm_fprintf (stream, "#%wd",
16857 INTVAL (XEXP (XEXP (x, 1), 1)));
16859 asm_fprintf (stream, "%r",
16860 REGNO (XEXP (XEXP (x, 1), 1)));
16862 else output_addr_const (stream, x);
16866 if (GET_CODE (x) == REG)
16867 asm_fprintf (stream, "[%r]", REGNO (x));
16868 else if (GET_CODE (x) == POST_INC)
16869 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16870 else if (GET_CODE (x) == PLUS)
16872 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16873 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16874 asm_fprintf (stream, "[%r, #%wd]",
16875 REGNO (XEXP (x, 0)),
16876 INTVAL (XEXP (x, 1)));
16878 asm_fprintf (stream, "[%r, %r]",
16879 REGNO (XEXP (x, 0)),
16880 REGNO (XEXP (x, 1)));
16883 output_addr_const (stream, x);
16887 /* Target hook for indicating whether a punctuation character for
16888 TARGET_PRINT_OPERAND is valid. */
16890 arm_print_operand_punct_valid_p (unsigned char code)
16892 return (code == '@' || code == '|' || code == '.'
16893 || code == '(' || code == ')' || code == '#'
16894 || (TARGET_32BIT && (code == '?'))
16895 || (TARGET_THUMB2 && (code == '!'))
16896 || (TARGET_THUMB && (code == '_')));
16899 /* Target hook for assembling integer objects. The ARM version needs to
16900 handle word-sized values specially. */
16902 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16904 enum machine_mode mode;
16906 if (size == UNITS_PER_WORD && aligned_p)
16908 fputs ("\t.word\t", asm_out_file);
16909 output_addr_const (asm_out_file, x);
16911 /* Mark symbols as position independent. We only do this in the
16912 .text segment, not in the .data segment. */
16913 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16914 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16916 /* See legitimize_pic_address for an explanation of the
16917 TARGET_VXWORKS_RTP check. */
16918 if (TARGET_VXWORKS_RTP
16919 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16920 fputs ("(GOT)", asm_out_file);
16922 fputs ("(GOTOFF)", asm_out_file);
16924 fputc ('\n', asm_out_file);
16928 mode = GET_MODE (x);
16930 if (arm_vector_mode_supported_p (mode))
16934 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16936 units = CONST_VECTOR_NUNITS (x);
16937 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16939 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16940 for (i = 0; i < units; i++)
16942 rtx elt = CONST_VECTOR_ELT (x, i);
16944 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16947 for (i = 0; i < units; i++)
16949 rtx elt = CONST_VECTOR_ELT (x, i);
16950 REAL_VALUE_TYPE rval;
16952 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16955 (rval, GET_MODE_INNER (mode),
16956 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16962 return default_assemble_integer (x, size, aligned_p);
16966 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16970 if (!TARGET_AAPCS_BASED)
16973 default_named_section_asm_out_constructor
16974 : default_named_section_asm_out_destructor) (symbol, priority);
16978 /* Put these in the .init_array section, using a special relocation. */
16979 if (priority != DEFAULT_INIT_PRIORITY)
16982 sprintf (buf, "%s.%.5u",
16983 is_ctor ? ".init_array" : ".fini_array",
16985 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16992 switch_to_section (s);
16993 assemble_align (POINTER_SIZE);
16994 fputs ("\t.word\t", asm_out_file);
16995 output_addr_const (asm_out_file, symbol);
16996 fputs ("(target1)\n", asm_out_file);
16999 /* Add a function to the list of static constructors. */
17002 arm_elf_asm_constructor (rtx symbol, int priority)
17004 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
17007 /* Add a function to the list of static destructors. */
17010 arm_elf_asm_destructor (rtx symbol, int priority)
17012 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
17015 /* A finite state machine takes care of noticing whether or not instructions
17016 can be conditionally executed, and thus decrease execution time and code
17017 size by deleting branch instructions. The fsm is controlled by
17018 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17020 /* The state of the fsm controlling condition codes are:
17021 0: normal, do nothing special
17022 1: make ASM_OUTPUT_OPCODE not output this instruction
17023 2: make ASM_OUTPUT_OPCODE not output this instruction
17024 3: make instructions conditional
17025 4: make instructions conditional
17027 State transitions (state->state by whom under condition):
17028 0 -> 1 final_prescan_insn if the `target' is a label
17029 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
17030 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
17031 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
17032 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
17033 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
17034 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
17035 (the target insn is arm_target_insn).
17037 If the jump clobbers the conditions then we use states 2 and 4.
17039 A similar thing can be done with conditional return insns.
17041 XXX In case the `target' is an unconditional branch, this conditionalising
17042 of the instructions always reduces code size, but not always execution
17043 time. But then, I want to reduce the code size to somewhere near what
17044 /bin/cc produces. */
17046 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
17047 instructions. When a COND_EXEC instruction is seen the subsequent
17048 instructions are scanned so that multiple conditional instructions can be
17049 combined into a single IT block. arm_condexec_count and arm_condexec_mask
17050 specify the length and true/false mask for the IT block. These will be
17051 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
17053 /* Returns the index of the ARM condition code string in
17054 `arm_condition_codes'. COMPARISON should be an rtx like
17055 `(eq (...) (...))'. */
17056 static enum arm_cond_code
17057 get_arm_condition_code (rtx comparison)
17059 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
17060 enum arm_cond_code code;
17061 enum rtx_code comp_code = GET_CODE (comparison);
17063 if (GET_MODE_CLASS (mode) != MODE_CC)
17064 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
17065 XEXP (comparison, 1));
17069 case CC_DNEmode: code = ARM_NE; goto dominance;
17070 case CC_DEQmode: code = ARM_EQ; goto dominance;
17071 case CC_DGEmode: code = ARM_GE; goto dominance;
17072 case CC_DGTmode: code = ARM_GT; goto dominance;
17073 case CC_DLEmode: code = ARM_LE; goto dominance;
17074 case CC_DLTmode: code = ARM_LT; goto dominance;
17075 case CC_DGEUmode: code = ARM_CS; goto dominance;
17076 case CC_DGTUmode: code = ARM_HI; goto dominance;
17077 case CC_DLEUmode: code = ARM_LS; goto dominance;
17078 case CC_DLTUmode: code = ARM_CC;
17081 gcc_assert (comp_code == EQ || comp_code == NE);
17083 if (comp_code == EQ)
17084 return ARM_INVERSE_CONDITION_CODE (code);
17090 case NE: return ARM_NE;
17091 case EQ: return ARM_EQ;
17092 case GE: return ARM_PL;
17093 case LT: return ARM_MI;
17094 default: gcc_unreachable ();
17100 case NE: return ARM_NE;
17101 case EQ: return ARM_EQ;
17102 default: gcc_unreachable ();
17108 case NE: return ARM_MI;
17109 case EQ: return ARM_PL;
17110 default: gcc_unreachable ();
17115 /* These encodings assume that AC=1 in the FPA system control
17116 byte. This allows us to handle all cases except UNEQ and
17120 case GE: return ARM_GE;
17121 case GT: return ARM_GT;
17122 case LE: return ARM_LS;
17123 case LT: return ARM_MI;
17124 case NE: return ARM_NE;
17125 case EQ: return ARM_EQ;
17126 case ORDERED: return ARM_VC;
17127 case UNORDERED: return ARM_VS;
17128 case UNLT: return ARM_LT;
17129 case UNLE: return ARM_LE;
17130 case UNGT: return ARM_HI;
17131 case UNGE: return ARM_PL;
17132 /* UNEQ and LTGT do not have a representation. */
17133 case UNEQ: /* Fall through. */
17134 case LTGT: /* Fall through. */
17135 default: gcc_unreachable ();
17141 case NE: return ARM_NE;
17142 case EQ: return ARM_EQ;
17143 case GE: return ARM_LE;
17144 case GT: return ARM_LT;
17145 case LE: return ARM_GE;
17146 case LT: return ARM_GT;
17147 case GEU: return ARM_LS;
17148 case GTU: return ARM_CC;
17149 case LEU: return ARM_CS;
17150 case LTU: return ARM_HI;
17151 default: gcc_unreachable ();
17157 case LTU: return ARM_CS;
17158 case GEU: return ARM_CC;
17159 default: gcc_unreachable ();
17165 case NE: return ARM_NE;
17166 case EQ: return ARM_EQ;
17167 case GEU: return ARM_CS;
17168 case GTU: return ARM_HI;
17169 case LEU: return ARM_LS;
17170 case LTU: return ARM_CC;
17171 default: gcc_unreachable ();
17177 case GE: return ARM_GE;
17178 case LT: return ARM_LT;
17179 case GEU: return ARM_CS;
17180 case LTU: return ARM_CC;
17181 default: gcc_unreachable ();
17187 case NE: return ARM_NE;
17188 case EQ: return ARM_EQ;
17189 case GE: return ARM_GE;
17190 case GT: return ARM_GT;
17191 case LE: return ARM_LE;
17192 case LT: return ARM_LT;
17193 case GEU: return ARM_CS;
17194 case GTU: return ARM_HI;
17195 case LEU: return ARM_LS;
17196 case LTU: return ARM_CC;
17197 default: gcc_unreachable ();
17200 default: gcc_unreachable ();
17204 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17207 thumb2_final_prescan_insn (rtx insn)
17209 rtx first_insn = insn;
17210 rtx body = PATTERN (insn);
17212 enum arm_cond_code code;
17216 /* Remove the previous insn from the count of insns to be output. */
17217 if (arm_condexec_count)
17218 arm_condexec_count--;
17220 /* Nothing to do if we are already inside a conditional block. */
17221 if (arm_condexec_count)
17224 if (GET_CODE (body) != COND_EXEC)
17227 /* Conditional jumps are implemented directly. */
17228 if (GET_CODE (insn) == JUMP_INSN)
17231 predicate = COND_EXEC_TEST (body);
17232 arm_current_cc = get_arm_condition_code (predicate);
17234 n = get_attr_ce_count (insn);
17235 arm_condexec_count = 1;
17236 arm_condexec_mask = (1 << n) - 1;
17237 arm_condexec_masklen = n;
17238 /* See if subsequent instructions can be combined into the same block. */
17241 insn = next_nonnote_insn (insn);
17243 /* Jumping into the middle of an IT block is illegal, so a label or
17244 barrier terminates the block. */
17245 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17248 body = PATTERN (insn);
17249 /* USE and CLOBBER aren't really insns, so just skip them. */
17250 if (GET_CODE (body) == USE
17251 || GET_CODE (body) == CLOBBER)
17254 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17255 if (GET_CODE (body) != COND_EXEC)
17257 /* Allow up to 4 conditionally executed instructions in a block. */
17258 n = get_attr_ce_count (insn);
17259 if (arm_condexec_masklen + n > 4)
17262 predicate = COND_EXEC_TEST (body);
17263 code = get_arm_condition_code (predicate);
17264 mask = (1 << n) - 1;
17265 if (arm_current_cc == code)
17266 arm_condexec_mask |= (mask << arm_condexec_masklen);
17267 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17270 arm_condexec_count++;
17271 arm_condexec_masklen += n;
17273 /* A jump must be the last instruction in a conditional block. */
17274 if (GET_CODE(insn) == JUMP_INSN)
17277 /* Restore recog_data (getting the attributes of other insns can
17278 destroy this array, but final.c assumes that it remains intact
17279 across this call). */
17280 extract_constrain_insn_cached (first_insn);
17284 arm_final_prescan_insn (rtx insn)
17286 /* BODY will hold the body of INSN. */
17287 rtx body = PATTERN (insn);
17289 /* This will be 1 if trying to repeat the trick, and things need to be
17290 reversed if it appears to fail. */
17293 /* If we start with a return insn, we only succeed if we find another one. */
17294 int seeking_return = 0;
17296 /* START_INSN will hold the insn from where we start looking. This is the
17297 first insn after the following code_label if REVERSE is true. */
17298 rtx start_insn = insn;
17300 /* If in state 4, check if the target branch is reached, in order to
17301 change back to state 0. */
17302 if (arm_ccfsm_state == 4)
17304 if (insn == arm_target_insn)
17306 arm_target_insn = NULL;
17307 arm_ccfsm_state = 0;
17312 /* If in state 3, it is possible to repeat the trick, if this insn is an
17313 unconditional branch to a label, and immediately following this branch
17314 is the previous target label which is only used once, and the label this
17315 branch jumps to is not too far off. */
17316 if (arm_ccfsm_state == 3)
17318 if (simplejump_p (insn))
17320 start_insn = next_nonnote_insn (start_insn);
17321 if (GET_CODE (start_insn) == BARRIER)
17323 /* XXX Isn't this always a barrier? */
17324 start_insn = next_nonnote_insn (start_insn);
17326 if (GET_CODE (start_insn) == CODE_LABEL
17327 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17328 && LABEL_NUSES (start_insn) == 1)
17333 else if (GET_CODE (body) == RETURN)
17335 start_insn = next_nonnote_insn (start_insn);
17336 if (GET_CODE (start_insn) == BARRIER)
17337 start_insn = next_nonnote_insn (start_insn);
17338 if (GET_CODE (start_insn) == CODE_LABEL
17339 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17340 && LABEL_NUSES (start_insn) == 1)
17343 seeking_return = 1;
17352 gcc_assert (!arm_ccfsm_state || reverse);
17353 if (GET_CODE (insn) != JUMP_INSN)
17356 /* This jump might be paralleled with a clobber of the condition codes
17357 the jump should always come first */
17358 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17359 body = XVECEXP (body, 0, 0);
17362 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17363 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17366 int fail = FALSE, succeed = FALSE;
17367 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17368 int then_not_else = TRUE;
17369 rtx this_insn = start_insn, label = 0;
17371 /* Register the insn jumped to. */
17374 if (!seeking_return)
17375 label = XEXP (SET_SRC (body), 0);
17377 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17378 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17379 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17381 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17382 then_not_else = FALSE;
17384 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17385 seeking_return = 1;
17386 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17388 seeking_return = 1;
17389 then_not_else = FALSE;
17392 gcc_unreachable ();
17394 /* See how many insns this branch skips, and what kind of insns. If all
17395 insns are okay, and the label or unconditional branch to the same
17396 label is not too far away, succeed. */
17397 for (insns_skipped = 0;
17398 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17402 this_insn = next_nonnote_insn (this_insn);
17406 switch (GET_CODE (this_insn))
17409 /* Succeed if it is the target label, otherwise fail since
17410 control falls in from somewhere else. */
17411 if (this_insn == label)
17413 arm_ccfsm_state = 1;
17421 /* Succeed if the following insn is the target label.
17423 If return insns are used then the last insn in a function
17424 will be a barrier. */
17425 this_insn = next_nonnote_insn (this_insn);
17426 if (this_insn && this_insn == label)
17428 arm_ccfsm_state = 1;
17436 /* The AAPCS says that conditional calls should not be
17437 used since they make interworking inefficient (the
17438 linker can't transform BL<cond> into BLX). That's
17439 only a problem if the machine has BLX. */
17446 /* Succeed if the following insn is the target label, or
17447 if the following two insns are a barrier and the
17449 this_insn = next_nonnote_insn (this_insn);
17450 if (this_insn && GET_CODE (this_insn) == BARRIER)
17451 this_insn = next_nonnote_insn (this_insn);
17453 if (this_insn && this_insn == label
17454 && insns_skipped < max_insns_skipped)
17456 arm_ccfsm_state = 1;
17464 /* If this is an unconditional branch to the same label, succeed.
17465 If it is to another label, do nothing. If it is conditional,
17467 /* XXX Probably, the tests for SET and the PC are
17470 scanbody = PATTERN (this_insn);
17471 if (GET_CODE (scanbody) == SET
17472 && GET_CODE (SET_DEST (scanbody)) == PC)
17474 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17475 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17477 arm_ccfsm_state = 2;
17480 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17483 /* Fail if a conditional return is undesirable (e.g. on a
17484 StrongARM), but still allow this if optimizing for size. */
17485 else if (GET_CODE (scanbody) == RETURN
17486 && !use_return_insn (TRUE, NULL)
17489 else if (GET_CODE (scanbody) == RETURN
17492 arm_ccfsm_state = 2;
17495 else if (GET_CODE (scanbody) == PARALLEL)
17497 switch (get_attr_conds (this_insn))
17507 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17512 /* Instructions using or affecting the condition codes make it
17514 scanbody = PATTERN (this_insn);
17515 if (!(GET_CODE (scanbody) == SET
17516 || GET_CODE (scanbody) == PARALLEL)
17517 || get_attr_conds (this_insn) != CONDS_NOCOND)
17520 /* A conditional cirrus instruction must be followed by
17521 a non Cirrus instruction. However, since we
17522 conditionalize instructions in this function and by
17523 the time we get here we can't add instructions
17524 (nops), because shorten_branches() has already been
17525 called, we will disable conditionalizing Cirrus
17526 instructions to be safe. */
17527 if (GET_CODE (scanbody) != USE
17528 && GET_CODE (scanbody) != CLOBBER
17529 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17539 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17540 arm_target_label = CODE_LABEL_NUMBER (label);
17543 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17545 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17547 this_insn = next_nonnote_insn (this_insn);
17548 gcc_assert (!this_insn
17549 || (GET_CODE (this_insn) != BARRIER
17550 && GET_CODE (this_insn) != CODE_LABEL));
17554 /* Oh, dear! we ran off the end.. give up. */
17555 extract_constrain_insn_cached (insn);
17556 arm_ccfsm_state = 0;
17557 arm_target_insn = NULL;
17560 arm_target_insn = this_insn;
17563 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17566 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17568 if (reverse || then_not_else)
17569 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17572 /* Restore recog_data (getting the attributes of other insns can
17573 destroy this array, but final.c assumes that it remains intact
17574 across this call. */
17575 extract_constrain_insn_cached (insn);
17579 /* Output IT instructions. */
17581 thumb2_asm_output_opcode (FILE * stream)
17586 if (arm_condexec_mask)
17588 for (n = 0; n < arm_condexec_masklen; n++)
17589 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17591 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17592 arm_condition_codes[arm_current_cc]);
17593 arm_condexec_mask = 0;
17597 /* Returns true if REGNO is a valid register
17598 for holding a quantity of type MODE. */
17600 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17602 if (GET_MODE_CLASS (mode) == MODE_CC)
17603 return (regno == CC_REGNUM
17604 || (TARGET_HARD_FLOAT && TARGET_VFP
17605 && regno == VFPCC_REGNUM));
17608 /* For the Thumb we only allow values bigger than SImode in
17609 registers 0 - 6, so that there is always a second low
17610 register available to hold the upper part of the value.
17611 We probably we ought to ensure that the register is the
17612 start of an even numbered register pair. */
17613 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17615 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17616 && IS_CIRRUS_REGNUM (regno))
17617 /* We have outlawed SI values in Cirrus registers because they
17618 reside in the lower 32 bits, but SF values reside in the
17619 upper 32 bits. This causes gcc all sorts of grief. We can't
17620 even split the registers into pairs because Cirrus SI values
17621 get sign extended to 64bits-- aldyh. */
17622 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17624 if (TARGET_HARD_FLOAT && TARGET_VFP
17625 && IS_VFP_REGNUM (regno))
17627 if (mode == SFmode || mode == SImode)
17628 return VFP_REGNO_OK_FOR_SINGLE (regno);
17630 if (mode == DFmode)
17631 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17633 /* VFP registers can hold HFmode values, but there is no point in
17634 putting them there unless we have hardware conversion insns. */
17635 if (mode == HFmode)
17636 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17639 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17640 || (VALID_NEON_QREG_MODE (mode)
17641 && NEON_REGNO_OK_FOR_QUAD (regno))
17642 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17643 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17644 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17645 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17646 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17651 if (TARGET_REALLY_IWMMXT)
17653 if (IS_IWMMXT_GR_REGNUM (regno))
17654 return mode == SImode;
17656 if (IS_IWMMXT_REGNUM (regno))
17657 return VALID_IWMMXT_REG_MODE (mode);
17660 /* We allow almost any value to be stored in the general registers.
17661 Restrict doubleword quantities to even register pairs so that we can
17662 use ldrd. Do not allow very large Neon structure opaque modes in
17663 general registers; they would use too many. */
17664 if (regno <= LAST_ARM_REGNUM)
17665 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17666 && ARM_NUM_REGS (mode) <= 4;
17668 if (regno == FRAME_POINTER_REGNUM
17669 || regno == ARG_POINTER_REGNUM)
17670 /* We only allow integers in the fake hard registers. */
17671 return GET_MODE_CLASS (mode) == MODE_INT;
17673 /* The only registers left are the FPA registers
17674 which we only allow to hold FP values. */
17675 return (TARGET_HARD_FLOAT && TARGET_FPA
17676 && GET_MODE_CLASS (mode) == MODE_FLOAT
17677 && regno >= FIRST_FPA_REGNUM
17678 && regno <= LAST_FPA_REGNUM);
17681 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17682 not used in arm mode. */
17685 arm_regno_class (int regno)
17689 if (regno == STACK_POINTER_REGNUM)
17691 if (regno == CC_REGNUM)
17698 if (TARGET_THUMB2 && regno < 8)
17701 if ( regno <= LAST_ARM_REGNUM
17702 || regno == FRAME_POINTER_REGNUM
17703 || regno == ARG_POINTER_REGNUM)
17704 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17706 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17707 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17709 if (IS_CIRRUS_REGNUM (regno))
17710 return CIRRUS_REGS;
17712 if (IS_VFP_REGNUM (regno))
17714 if (regno <= D7_VFP_REGNUM)
17715 return VFP_D0_D7_REGS;
17716 else if (regno <= LAST_LO_VFP_REGNUM)
17717 return VFP_LO_REGS;
17719 return VFP_HI_REGS;
17722 if (IS_IWMMXT_REGNUM (regno))
17723 return IWMMXT_REGS;
17725 if (IS_IWMMXT_GR_REGNUM (regno))
17726 return IWMMXT_GR_REGS;
17731 /* Handle a special case when computing the offset
17732 of an argument from the frame pointer. */
17734 arm_debugger_arg_offset (int value, rtx addr)
17738 /* We are only interested if dbxout_parms() failed to compute the offset. */
17742 /* We can only cope with the case where the address is held in a register. */
17743 if (GET_CODE (addr) != REG)
17746 /* If we are using the frame pointer to point at the argument, then
17747 an offset of 0 is correct. */
17748 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17751 /* If we are using the stack pointer to point at the
17752 argument, then an offset of 0 is correct. */
17753 /* ??? Check this is consistent with thumb2 frame layout. */
17754 if ((TARGET_THUMB || !frame_pointer_needed)
17755 && REGNO (addr) == SP_REGNUM)
17758 /* Oh dear. The argument is pointed to by a register rather
17759 than being held in a register, or being stored at a known
17760 offset from the frame pointer. Since GDB only understands
17761 those two kinds of argument we must translate the address
17762 held in the register into an offset from the frame pointer.
17763 We do this by searching through the insns for the function
17764 looking to see where this register gets its value. If the
17765 register is initialized from the frame pointer plus an offset
17766 then we are in luck and we can continue, otherwise we give up.
17768 This code is exercised by producing debugging information
17769 for a function with arguments like this:
17771 double func (double a, double b, int c, double d) {return d;}
17773 Without this code the stab for parameter 'd' will be set to
17774 an offset of 0 from the frame pointer, rather than 8. */
17776 /* The if() statement says:
17778 If the insn is a normal instruction
17779 and if the insn is setting the value in a register
17780 and if the register being set is the register holding the address of the argument
17781 and if the address is computing by an addition
17782 that involves adding to a register
17783 which is the frame pointer
17788 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17790 if ( GET_CODE (insn) == INSN
17791 && GET_CODE (PATTERN (insn)) == SET
17792 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17793 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17794 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17795 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17796 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17799 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17808 warning (0, "unable to compute real location of stacked parameter");
17809 value = 8; /* XXX magic hack */
17815 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17818 if ((MASK) & insn_flags) \
17819 add_builtin_function ((NAME), (TYPE), (CODE), \
17820 BUILT_IN_MD, NULL, NULL_TREE); \
17824 struct builtin_description
17826 const unsigned int mask;
17827 const enum insn_code icode;
17828 const char * const name;
17829 const enum arm_builtins code;
17830 const enum rtx_code comparison;
17831 const unsigned int flag;
17834 static const struct builtin_description bdesc_2arg[] =
17836 #define IWMMXT_BUILTIN(code, string, builtin) \
17837 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17838 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17840 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
17841 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
17842 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
17843 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
17844 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
17845 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
17846 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
17847 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
17848 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
17849 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
17850 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
17851 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
17852 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
17853 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
17854 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
17855 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
17856 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
17857 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
17858 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
17859 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
17860 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
17861 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
17862 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
17863 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
17864 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
17865 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
17866 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
17867 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
17868 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
17869 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
17870 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
17871 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
17872 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
17873 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
17874 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
17875 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
17876 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
17877 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
17878 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
17879 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
17880 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
17881 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
17882 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
17883 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
17884 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
17885 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
17886 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
17887 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
17888 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
17889 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
17890 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
17891 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
17892 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
17893 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
17894 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
17895 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
17896 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
17897 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
17899 #define IWMMXT_BUILTIN2(code, builtin) \
17900 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17902 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
17903 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
17904 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
17905 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
17906 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
17907 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
17908 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
17909 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
17910 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
17911 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
17912 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
17913 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
17914 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
17915 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
17916 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
17917 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
17918 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
17919 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
17920 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
17921 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
17922 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
17923 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
17924 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
17925 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
17926 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
17927 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
17928 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
17929 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
17930 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
17931 IWMMXT_BUILTIN2 (rordi3, WRORDI)
17932 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
17933 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
17936 static const struct builtin_description bdesc_1arg[] =
17938 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
17939 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
17940 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
17941 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
17942 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
17943 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
17944 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
17945 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
17946 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
17947 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
17948 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
17949 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
17950 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
17951 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
17952 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
17953 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
17954 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
17955 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
17958 /* Set up all the iWMMXt builtins. This is
17959 not called if TARGET_IWMMXT is zero. */
17962 arm_init_iwmmxt_builtins (void)
17964 const struct builtin_description * d;
17966 tree endlink = void_list_node;
17968 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17969 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17970 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
17973 = build_function_type (integer_type_node,
17974 tree_cons (NULL_TREE, integer_type_node, endlink));
17975 tree v8qi_ftype_v8qi_v8qi_int
17976 = build_function_type (V8QI_type_node,
17977 tree_cons (NULL_TREE, V8QI_type_node,
17978 tree_cons (NULL_TREE, V8QI_type_node,
17979 tree_cons (NULL_TREE,
17982 tree v4hi_ftype_v4hi_int
17983 = build_function_type (V4HI_type_node,
17984 tree_cons (NULL_TREE, V4HI_type_node,
17985 tree_cons (NULL_TREE, integer_type_node,
17987 tree v2si_ftype_v2si_int
17988 = build_function_type (V2SI_type_node,
17989 tree_cons (NULL_TREE, V2SI_type_node,
17990 tree_cons (NULL_TREE, integer_type_node,
17992 tree v2si_ftype_di_di
17993 = build_function_type (V2SI_type_node,
17994 tree_cons (NULL_TREE, long_long_integer_type_node,
17995 tree_cons (NULL_TREE, long_long_integer_type_node,
17997 tree di_ftype_di_int
17998 = build_function_type (long_long_integer_type_node,
17999 tree_cons (NULL_TREE, long_long_integer_type_node,
18000 tree_cons (NULL_TREE, integer_type_node,
18002 tree di_ftype_di_int_int
18003 = build_function_type (long_long_integer_type_node,
18004 tree_cons (NULL_TREE, long_long_integer_type_node,
18005 tree_cons (NULL_TREE, integer_type_node,
18006 tree_cons (NULL_TREE,
18009 tree int_ftype_v8qi
18010 = build_function_type (integer_type_node,
18011 tree_cons (NULL_TREE, V8QI_type_node,
18013 tree int_ftype_v4hi
18014 = build_function_type (integer_type_node,
18015 tree_cons (NULL_TREE, V4HI_type_node,
18017 tree int_ftype_v2si
18018 = build_function_type (integer_type_node,
18019 tree_cons (NULL_TREE, V2SI_type_node,
18021 tree int_ftype_v8qi_int
18022 = build_function_type (integer_type_node,
18023 tree_cons (NULL_TREE, V8QI_type_node,
18024 tree_cons (NULL_TREE, integer_type_node,
18026 tree int_ftype_v4hi_int
18027 = build_function_type (integer_type_node,
18028 tree_cons (NULL_TREE, V4HI_type_node,
18029 tree_cons (NULL_TREE, integer_type_node,
18031 tree int_ftype_v2si_int
18032 = build_function_type (integer_type_node,
18033 tree_cons (NULL_TREE, V2SI_type_node,
18034 tree_cons (NULL_TREE, integer_type_node,
18036 tree v8qi_ftype_v8qi_int_int
18037 = build_function_type (V8QI_type_node,
18038 tree_cons (NULL_TREE, V8QI_type_node,
18039 tree_cons (NULL_TREE, integer_type_node,
18040 tree_cons (NULL_TREE,
18043 tree v4hi_ftype_v4hi_int_int
18044 = build_function_type (V4HI_type_node,
18045 tree_cons (NULL_TREE, V4HI_type_node,
18046 tree_cons (NULL_TREE, integer_type_node,
18047 tree_cons (NULL_TREE,
18050 tree v2si_ftype_v2si_int_int
18051 = build_function_type (V2SI_type_node,
18052 tree_cons (NULL_TREE, V2SI_type_node,
18053 tree_cons (NULL_TREE, integer_type_node,
18054 tree_cons (NULL_TREE,
18057 /* Miscellaneous. */
18058 tree v8qi_ftype_v4hi_v4hi
18059 = build_function_type (V8QI_type_node,
18060 tree_cons (NULL_TREE, V4HI_type_node,
18061 tree_cons (NULL_TREE, V4HI_type_node,
18063 tree v4hi_ftype_v2si_v2si
18064 = build_function_type (V4HI_type_node,
18065 tree_cons (NULL_TREE, V2SI_type_node,
18066 tree_cons (NULL_TREE, V2SI_type_node,
18068 tree v2si_ftype_v4hi_v4hi
18069 = build_function_type (V2SI_type_node,
18070 tree_cons (NULL_TREE, V4HI_type_node,
18071 tree_cons (NULL_TREE, V4HI_type_node,
18073 tree v2si_ftype_v8qi_v8qi
18074 = build_function_type (V2SI_type_node,
18075 tree_cons (NULL_TREE, V8QI_type_node,
18076 tree_cons (NULL_TREE, V8QI_type_node,
18078 tree v4hi_ftype_v4hi_di
18079 = build_function_type (V4HI_type_node,
18080 tree_cons (NULL_TREE, V4HI_type_node,
18081 tree_cons (NULL_TREE,
18082 long_long_integer_type_node,
18084 tree v2si_ftype_v2si_di
18085 = build_function_type (V2SI_type_node,
18086 tree_cons (NULL_TREE, V2SI_type_node,
18087 tree_cons (NULL_TREE,
18088 long_long_integer_type_node,
18090 tree void_ftype_int_int
18091 = build_function_type (void_type_node,
18092 tree_cons (NULL_TREE, integer_type_node,
18093 tree_cons (NULL_TREE, integer_type_node,
18096 = build_function_type (long_long_unsigned_type_node, endlink);
18098 = build_function_type (long_long_integer_type_node,
18099 tree_cons (NULL_TREE, V8QI_type_node,
18102 = build_function_type (long_long_integer_type_node,
18103 tree_cons (NULL_TREE, V4HI_type_node,
18106 = build_function_type (long_long_integer_type_node,
18107 tree_cons (NULL_TREE, V2SI_type_node,
18109 tree v2si_ftype_v4hi
18110 = build_function_type (V2SI_type_node,
18111 tree_cons (NULL_TREE, V4HI_type_node,
18113 tree v4hi_ftype_v8qi
18114 = build_function_type (V4HI_type_node,
18115 tree_cons (NULL_TREE, V8QI_type_node,
18118 tree di_ftype_di_v4hi_v4hi
18119 = build_function_type (long_long_unsigned_type_node,
18120 tree_cons (NULL_TREE,
18121 long_long_unsigned_type_node,
18122 tree_cons (NULL_TREE, V4HI_type_node,
18123 tree_cons (NULL_TREE,
18127 tree di_ftype_v4hi_v4hi
18128 = build_function_type (long_long_unsigned_type_node,
18129 tree_cons (NULL_TREE, V4HI_type_node,
18130 tree_cons (NULL_TREE, V4HI_type_node,
18133 /* Normal vector binops. */
18134 tree v8qi_ftype_v8qi_v8qi
18135 = build_function_type (V8QI_type_node,
18136 tree_cons (NULL_TREE, V8QI_type_node,
18137 tree_cons (NULL_TREE, V8QI_type_node,
18139 tree v4hi_ftype_v4hi_v4hi
18140 = build_function_type (V4HI_type_node,
18141 tree_cons (NULL_TREE, V4HI_type_node,
18142 tree_cons (NULL_TREE, V4HI_type_node,
18144 tree v2si_ftype_v2si_v2si
18145 = build_function_type (V2SI_type_node,
18146 tree_cons (NULL_TREE, V2SI_type_node,
18147 tree_cons (NULL_TREE, V2SI_type_node,
18149 tree di_ftype_di_di
18150 = build_function_type (long_long_unsigned_type_node,
18151 tree_cons (NULL_TREE, long_long_unsigned_type_node,
18152 tree_cons (NULL_TREE,
18153 long_long_unsigned_type_node,
18156 /* Add all builtins that are more or less simple operations on two
18158 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18160 /* Use one of the operands; the target can have a different mode for
18161 mask-generating compares. */
18162 enum machine_mode mode;
18168 mode = insn_data[d->icode].operand[1].mode;
18173 type = v8qi_ftype_v8qi_v8qi;
18176 type = v4hi_ftype_v4hi_v4hi;
18179 type = v2si_ftype_v2si_v2si;
18182 type = di_ftype_di_di;
18186 gcc_unreachable ();
18189 def_mbuiltin (d->mask, d->name, type, d->code);
18192 /* Add the remaining MMX insns with somewhat more complicated types. */
18193 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
18194 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
18195 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
18197 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
18198 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
18199 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
18200 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
18201 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
18202 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
18204 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
18205 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
18206 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
18207 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
18208 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
18209 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
18211 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
18212 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
18213 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
18214 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
18215 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
18216 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
18218 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
18219 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
18220 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
18221 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
18222 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
18223 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
18225 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
18227 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
18228 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
18229 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
18230 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
18232 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
18233 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
18234 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
18235 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
18236 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
18237 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
18238 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
18239 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
18240 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
18242 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
18243 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
18244 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
18246 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
18247 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
18248 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
18250 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
18251 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
18252 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
18253 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
18254 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
18255 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
18257 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
18258 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
18259 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
18260 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
18261 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
18262 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
18263 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
18264 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
18265 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
18266 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
18267 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
18268 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
18270 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
18271 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
18272 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
18273 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
18275 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
18276 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
18277 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
18278 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
18279 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
18280 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
18281 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
18285 arm_init_tls_builtins (void)
18289 ftype = build_function_type (ptr_type_node, void_list_node);
18290 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
18291 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
18293 TREE_NOTHROW (decl) = 1;
18294 TREE_READONLY (decl) = 1;
18297 enum neon_builtin_type_bits {
18313 #define v8qi_UP T_V8QI
18314 #define v4hi_UP T_V4HI
18315 #define v2si_UP T_V2SI
18316 #define v2sf_UP T_V2SF
18318 #define v16qi_UP T_V16QI
18319 #define v8hi_UP T_V8HI
18320 #define v4si_UP T_V4SI
18321 #define v4sf_UP T_V4SF
18322 #define v2di_UP T_V2DI
18327 #define UP(X) X##_UP
18362 NEON_LOADSTRUCTLANE,
18364 NEON_STORESTRUCTLANE,
18373 const neon_itype itype;
18375 const enum insn_code codes[T_MAX];
18376 const unsigned int num_vars;
18377 unsigned int base_fcode;
18378 } neon_builtin_datum;
18380 #define CF(N,X) CODE_FOR_neon_##N##X
18382 #define VAR1(T, N, A) \
18383 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
18384 #define VAR2(T, N, A, B) \
18385 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
18386 #define VAR3(T, N, A, B, C) \
18387 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
18388 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
18389 #define VAR4(T, N, A, B, C, D) \
18390 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
18391 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
18392 #define VAR5(T, N, A, B, C, D, E) \
18393 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
18394 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
18395 #define VAR6(T, N, A, B, C, D, E, F) \
18396 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
18397 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
18398 #define VAR7(T, N, A, B, C, D, E, F, G) \
18399 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
18400 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18402 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18403 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18405 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18406 CF (N, G), CF (N, H) }, 8, 0
18407 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18408 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18409 | UP (H) | UP (I), \
18410 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18411 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
18412 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18413 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18414 | UP (H) | UP (I) | UP (J), \
18415 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18416 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
18418 /* The mode entries in the following table correspond to the "key" type of the
18419 instruction variant, i.e. equivalent to that which would be specified after
18420 the assembler mnemonic, which usually refers to the last vector operand.
18421 (Signed/unsigned/polynomial types are not differentiated between though, and
18422 are all mapped onto the same mode for a given element size.) The modes
18423 listed per instruction should be the same as those defined for that
18424 instruction's pattern in neon.md.
18425 WARNING: Variants should be listed in the same increasing order as
18426 neon_builtin_type_bits. */
18428 static neon_builtin_datum neon_builtin_data[] =
18430 { VAR10 (BINOP, vadd,
18431 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18432 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
18433 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
18434 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18435 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18436 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
18437 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18438 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18439 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
18440 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18441 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
18442 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
18443 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
18444 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
18445 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
18446 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
18447 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
18448 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
18449 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
18450 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
18451 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
18452 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
18453 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18454 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18455 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18456 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
18457 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
18458 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
18459 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18460 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18461 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18462 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
18463 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18464 { VAR10 (BINOP, vsub,
18465 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18466 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
18467 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
18468 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18469 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18470 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
18471 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18472 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18473 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18474 { VAR2 (BINOP, vcage, v2sf, v4sf) },
18475 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
18476 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18477 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18478 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
18479 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18480 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
18481 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18482 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18483 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
18484 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18485 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18486 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
18487 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
18488 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
18489 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
18490 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18491 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
18492 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18493 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18494 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18495 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18496 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18497 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18498 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
18499 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
18500 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
18501 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
18502 /* FIXME: vget_lane supports more variants than this! */
18503 { VAR10 (GETLANE, vget_lane,
18504 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18505 { VAR10 (SETLANE, vset_lane,
18506 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18507 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
18508 { VAR10 (DUP, vdup_n,
18509 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18510 { VAR10 (DUPLANE, vdup_lane,
18511 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18512 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
18513 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
18514 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
18515 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
18516 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
18517 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
18518 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
18519 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18520 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18521 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
18522 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
18523 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18524 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
18525 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
18526 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18527 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18528 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
18529 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
18530 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18531 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
18532 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
18533 { VAR10 (BINOP, vext,
18534 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18535 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18536 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
18537 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
18538 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
18539 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
18540 { VAR10 (SELECT, vbsl,
18541 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18542 { VAR1 (VTBL, vtbl1, v8qi) },
18543 { VAR1 (VTBL, vtbl2, v8qi) },
18544 { VAR1 (VTBL, vtbl3, v8qi) },
18545 { VAR1 (VTBL, vtbl4, v8qi) },
18546 { VAR1 (VTBX, vtbx1, v8qi) },
18547 { VAR1 (VTBX, vtbx2, v8qi) },
18548 { VAR1 (VTBX, vtbx3, v8qi) },
18549 { VAR1 (VTBX, vtbx4, v8qi) },
18550 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18551 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18552 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
18553 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
18554 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
18555 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
18556 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
18557 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
18558 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
18559 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
18560 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
18561 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
18562 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
18563 { VAR10 (LOAD1, vld1,
18564 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18565 { VAR10 (LOAD1LANE, vld1_lane,
18566 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18567 { VAR10 (LOAD1, vld1_dup,
18568 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18569 { VAR10 (STORE1, vst1,
18570 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18571 { VAR10 (STORE1LANE, vst1_lane,
18572 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18573 { VAR9 (LOADSTRUCT,
18574 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18575 { VAR7 (LOADSTRUCTLANE, vld2_lane,
18576 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18577 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
18578 { VAR9 (STORESTRUCT, vst2,
18579 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18580 { VAR7 (STORESTRUCTLANE, vst2_lane,
18581 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18582 { VAR9 (LOADSTRUCT,
18583 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18584 { VAR7 (LOADSTRUCTLANE, vld3_lane,
18585 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18586 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
18587 { VAR9 (STORESTRUCT, vst3,
18588 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18589 { VAR7 (STORESTRUCTLANE, vst3_lane,
18590 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18591 { VAR9 (LOADSTRUCT, vld4,
18592 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18593 { VAR7 (LOADSTRUCTLANE, vld4_lane,
18594 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18595 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
18596 { VAR9 (STORESTRUCT, vst4,
18597 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
18598 { VAR7 (STORESTRUCTLANE, vst4_lane,
18599 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
18600 { VAR10 (LOGICBINOP, vand,
18601 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18602 { VAR10 (LOGICBINOP, vorr,
18603 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18604 { VAR10 (BINOP, veor,
18605 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18606 { VAR10 (LOGICBINOP, vbic,
18607 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
18608 { VAR10 (LOGICBINOP, vorn,
18609 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
18625 arm_init_neon_builtins (void)
18627 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
18629 tree neon_intQI_type_node;
18630 tree neon_intHI_type_node;
18631 tree neon_polyQI_type_node;
18632 tree neon_polyHI_type_node;
18633 tree neon_intSI_type_node;
18634 tree neon_intDI_type_node;
18635 tree neon_float_type_node;
18637 tree intQI_pointer_node;
18638 tree intHI_pointer_node;
18639 tree intSI_pointer_node;
18640 tree intDI_pointer_node;
18641 tree float_pointer_node;
18643 tree const_intQI_node;
18644 tree const_intHI_node;
18645 tree const_intSI_node;
18646 tree const_intDI_node;
18647 tree const_float_node;
18649 tree const_intQI_pointer_node;
18650 tree const_intHI_pointer_node;
18651 tree const_intSI_pointer_node;
18652 tree const_intDI_pointer_node;
18653 tree const_float_pointer_node;
18655 tree V8QI_type_node;
18656 tree V4HI_type_node;
18657 tree V2SI_type_node;
18658 tree V2SF_type_node;
18659 tree V16QI_type_node;
18660 tree V8HI_type_node;
18661 tree V4SI_type_node;
18662 tree V4SF_type_node;
18663 tree V2DI_type_node;
18665 tree intUQI_type_node;
18666 tree intUHI_type_node;
18667 tree intUSI_type_node;
18668 tree intUDI_type_node;
18670 tree intEI_type_node;
18671 tree intOI_type_node;
18672 tree intCI_type_node;
18673 tree intXI_type_node;
18675 tree V8QI_pointer_node;
18676 tree V4HI_pointer_node;
18677 tree V2SI_pointer_node;
18678 tree V2SF_pointer_node;
18679 tree V16QI_pointer_node;
18680 tree V8HI_pointer_node;
18681 tree V4SI_pointer_node;
18682 tree V4SF_pointer_node;
18683 tree V2DI_pointer_node;
18685 tree void_ftype_pv8qi_v8qi_v8qi;
18686 tree void_ftype_pv4hi_v4hi_v4hi;
18687 tree void_ftype_pv2si_v2si_v2si;
18688 tree void_ftype_pv2sf_v2sf_v2sf;
18689 tree void_ftype_pdi_di_di;
18690 tree void_ftype_pv16qi_v16qi_v16qi;
18691 tree void_ftype_pv8hi_v8hi_v8hi;
18692 tree void_ftype_pv4si_v4si_v4si;
18693 tree void_ftype_pv4sf_v4sf_v4sf;
18694 tree void_ftype_pv2di_v2di_v2di;
18696 tree reinterp_ftype_dreg[5][5];
18697 tree reinterp_ftype_qreg[5][5];
18698 tree dreg_types[5], qreg_types[5];
18700 /* Create distinguished type nodes for NEON vector element types,
18701 and pointers to values of such types, so we can detect them later. */
18702 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18703 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18704 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18705 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18706 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18707 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18708 neon_float_type_node = make_node (REAL_TYPE);
18709 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18710 layout_type (neon_float_type_node);
18712 /* Define typedefs which exactly correspond to the modes we are basing vector
18713 types on. If you change these names you'll need to change
18714 the table used by arm_mangle_type too. */
18715 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18716 "__builtin_neon_qi");
18717 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18718 "__builtin_neon_hi");
18719 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18720 "__builtin_neon_si");
18721 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18722 "__builtin_neon_sf");
18723 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18724 "__builtin_neon_di");
18725 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18726 "__builtin_neon_poly8");
18727 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18728 "__builtin_neon_poly16");
18730 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18731 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18732 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18733 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18734 float_pointer_node = build_pointer_type (neon_float_type_node);
18736 /* Next create constant-qualified versions of the above types. */
18737 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18739 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18741 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18743 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18745 const_float_node = build_qualified_type (neon_float_type_node,
18748 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18749 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18750 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18751 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18752 const_float_pointer_node = build_pointer_type (const_float_node);
18754 /* Now create vector types based on our NEON element types. */
18755 /* 64-bit vectors. */
18757 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18759 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18761 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18763 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18764 /* 128-bit vectors. */
18766 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18768 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18770 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18772 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18774 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18776 /* Unsigned integer types for various mode sizes. */
18777 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18778 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18779 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18780 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18782 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18783 "__builtin_neon_uqi");
18784 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18785 "__builtin_neon_uhi");
18786 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18787 "__builtin_neon_usi");
18788 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18789 "__builtin_neon_udi");
18791 /* Opaque integer types for structures of vectors. */
18792 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18793 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18794 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18795 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18797 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18798 "__builtin_neon_ti");
18799 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18800 "__builtin_neon_ei");
18801 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18802 "__builtin_neon_oi");
18803 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18804 "__builtin_neon_ci");
18805 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18806 "__builtin_neon_xi");
18808 /* Pointers to vector types. */
18809 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18810 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18811 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18812 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18813 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18814 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18815 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18816 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18817 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18819 /* Operations which return results as pairs. */
18820 void_ftype_pv8qi_v8qi_v8qi =
18821 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18822 V8QI_type_node, NULL);
18823 void_ftype_pv4hi_v4hi_v4hi =
18824 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18825 V4HI_type_node, NULL);
18826 void_ftype_pv2si_v2si_v2si =
18827 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18828 V2SI_type_node, NULL);
18829 void_ftype_pv2sf_v2sf_v2sf =
18830 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18831 V2SF_type_node, NULL);
18832 void_ftype_pdi_di_di =
18833 build_function_type_list (void_type_node, intDI_pointer_node,
18834 neon_intDI_type_node, neon_intDI_type_node, NULL);
18835 void_ftype_pv16qi_v16qi_v16qi =
18836 build_function_type_list (void_type_node, V16QI_pointer_node,
18837 V16QI_type_node, V16QI_type_node, NULL);
18838 void_ftype_pv8hi_v8hi_v8hi =
18839 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18840 V8HI_type_node, NULL);
18841 void_ftype_pv4si_v4si_v4si =
18842 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18843 V4SI_type_node, NULL);
18844 void_ftype_pv4sf_v4sf_v4sf =
18845 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18846 V4SF_type_node, NULL);
18847 void_ftype_pv2di_v2di_v2di =
18848 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18849 V2DI_type_node, NULL);
18851 dreg_types[0] = V8QI_type_node;
18852 dreg_types[1] = V4HI_type_node;
18853 dreg_types[2] = V2SI_type_node;
18854 dreg_types[3] = V2SF_type_node;
18855 dreg_types[4] = neon_intDI_type_node;
18857 qreg_types[0] = V16QI_type_node;
18858 qreg_types[1] = V8HI_type_node;
18859 qreg_types[2] = V4SI_type_node;
18860 qreg_types[3] = V4SF_type_node;
18861 qreg_types[4] = V2DI_type_node;
18863 for (i = 0; i < 5; i++)
18866 for (j = 0; j < 5; j++)
18868 reinterp_ftype_dreg[i][j]
18869 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18870 reinterp_ftype_qreg[i][j]
18871 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18875 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
18877 neon_builtin_datum *d = &neon_builtin_data[i];
18878 unsigned int j, codeidx = 0;
18880 d->base_fcode = fcode;
18882 for (j = 0; j < T_MAX; j++)
18884 const char* const modenames[] = {
18885 "v8qi", "v4hi", "v2si", "v2sf", "di",
18886 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18890 enum insn_code icode;
18891 int is_load = 0, is_store = 0;
18893 if ((d->bits & (1 << j)) == 0)
18896 icode = d->codes[codeidx++];
18901 case NEON_LOAD1LANE:
18902 case NEON_LOADSTRUCT:
18903 case NEON_LOADSTRUCTLANE:
18905 /* Fall through. */
18907 case NEON_STORE1LANE:
18908 case NEON_STORESTRUCT:
18909 case NEON_STORESTRUCTLANE:
18912 /* Fall through. */
18915 case NEON_LOGICBINOP:
18916 case NEON_SHIFTINSERT:
18923 case NEON_SHIFTIMM:
18924 case NEON_SHIFTACC:
18930 case NEON_LANEMULL:
18931 case NEON_LANEMULH:
18933 case NEON_SCALARMUL:
18934 case NEON_SCALARMULL:
18935 case NEON_SCALARMULH:
18936 case NEON_SCALARMAC:
18942 tree return_type = void_type_node, args = void_list_node;
18944 /* Build a function type directly from the insn_data for this
18945 builtin. The build_function_type() function takes care of
18946 removing duplicates for us. */
18947 for (k = insn_data[icode].n_generator_args - 1; k >= 0; k--)
18951 if (is_load && k == 1)
18953 /* Neon load patterns always have the memory operand
18954 (a SImode pointer) in the operand 1 position. We
18955 want a const pointer to the element type in that
18957 gcc_assert (insn_data[icode].operand[k].mode == SImode);
18963 eltype = const_intQI_pointer_node;
18968 eltype = const_intHI_pointer_node;
18973 eltype = const_intSI_pointer_node;
18978 eltype = const_float_pointer_node;
18983 eltype = const_intDI_pointer_node;
18986 default: gcc_unreachable ();
18989 else if (is_store && k == 0)
18991 /* Similarly, Neon store patterns use operand 0 as
18992 the memory location to store to (a SImode pointer).
18993 Use a pointer to the element type of the store in
18995 gcc_assert (insn_data[icode].operand[k].mode == SImode);
19001 eltype = intQI_pointer_node;
19006 eltype = intHI_pointer_node;
19011 eltype = intSI_pointer_node;
19016 eltype = float_pointer_node;
19021 eltype = intDI_pointer_node;
19024 default: gcc_unreachable ();
19029 switch (insn_data[icode].operand[k].mode)
19031 case VOIDmode: eltype = void_type_node; break;
19033 case QImode: eltype = neon_intQI_type_node; break;
19034 case HImode: eltype = neon_intHI_type_node; break;
19035 case SImode: eltype = neon_intSI_type_node; break;
19036 case SFmode: eltype = neon_float_type_node; break;
19037 case DImode: eltype = neon_intDI_type_node; break;
19038 case TImode: eltype = intTI_type_node; break;
19039 case EImode: eltype = intEI_type_node; break;
19040 case OImode: eltype = intOI_type_node; break;
19041 case CImode: eltype = intCI_type_node; break;
19042 case XImode: eltype = intXI_type_node; break;
19043 /* 64-bit vectors. */
19044 case V8QImode: eltype = V8QI_type_node; break;
19045 case V4HImode: eltype = V4HI_type_node; break;
19046 case V2SImode: eltype = V2SI_type_node; break;
19047 case V2SFmode: eltype = V2SF_type_node; break;
19048 /* 128-bit vectors. */
19049 case V16QImode: eltype = V16QI_type_node; break;
19050 case V8HImode: eltype = V8HI_type_node; break;
19051 case V4SImode: eltype = V4SI_type_node; break;
19052 case V4SFmode: eltype = V4SF_type_node; break;
19053 case V2DImode: eltype = V2DI_type_node; break;
19054 default: gcc_unreachable ();
19058 if (k == 0 && !is_store)
19059 return_type = eltype;
19061 args = tree_cons (NULL_TREE, eltype, args);
19064 ftype = build_function_type (return_type, args);
19068 case NEON_RESULTPAIR:
19070 switch (insn_data[icode].operand[1].mode)
19072 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19073 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19074 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19075 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19076 case DImode: ftype = void_ftype_pdi_di_di; break;
19077 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19078 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19079 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19080 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19081 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19082 default: gcc_unreachable ();
19087 case NEON_REINTERP:
19089 /* We iterate over 5 doubleword types, then 5 quadword
19092 switch (insn_data[icode].operand[0].mode)
19094 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19095 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19096 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19097 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19098 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19099 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19100 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19101 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19102 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19103 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19104 default: gcc_unreachable ();
19110 gcc_unreachable ();
19113 gcc_assert (ftype != NULL);
19115 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
19117 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
19124 arm_init_fp16_builtins (void)
19126 tree fp16_type = make_node (REAL_TYPE);
19127 TYPE_PRECISION (fp16_type) = 16;
19128 layout_type (fp16_type);
19129 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
19133 arm_init_builtins (void)
19135 arm_init_tls_builtins ();
19137 if (TARGET_REALLY_IWMMXT)
19138 arm_init_iwmmxt_builtins ();
19141 arm_init_neon_builtins ();
19143 if (arm_fp16_format)
19144 arm_init_fp16_builtins ();
19147 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19149 static const char *
19150 arm_invalid_parameter_type (const_tree t)
19152 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19153 return N_("function parameters cannot have __fp16 type");
19157 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19159 static const char *
19160 arm_invalid_return_type (const_tree t)
19162 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19163 return N_("functions cannot return __fp16 type");
19167 /* Implement TARGET_PROMOTED_TYPE. */
19170 arm_promoted_type (const_tree t)
19172 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19173 return float_type_node;
19177 /* Implement TARGET_CONVERT_TO_TYPE.
19178 Specifically, this hook implements the peculiarity of the ARM
19179 half-precision floating-point C semantics that requires conversions between
19180 __fp16 to or from double to do an intermediate conversion to float. */
19183 arm_convert_to_type (tree type, tree expr)
19185 tree fromtype = TREE_TYPE (expr);
19186 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
19188 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
19189 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
19190 return convert (type, convert (float_type_node, expr));
19194 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19195 This simply adds HFmode as a supported mode; even though we don't
19196 implement arithmetic on this type directly, it's supported by
19197 optabs conversions, much the way the double-word arithmetic is
19198 special-cased in the default hook. */
19201 arm_scalar_mode_supported_p (enum machine_mode mode)
19203 if (mode == HFmode)
19204 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
19206 return default_scalar_mode_supported_p (mode);
19209 /* Errors in the source file can cause expand_expr to return const0_rtx
19210 where we expect a vector. To avoid crashing, use one of the vector
19211 clear instructions. */
19214 safe_vector_operand (rtx x, enum machine_mode mode)
19216 if (x != const0_rtx)
19218 x = gen_reg_rtx (mode);
19220 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
19221 : gen_rtx_SUBREG (DImode, x, 0)));
19225 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19228 arm_expand_binop_builtin (enum insn_code icode,
19229 tree exp, rtx target)
19232 tree arg0 = CALL_EXPR_ARG (exp, 0);
19233 tree arg1 = CALL_EXPR_ARG (exp, 1);
19234 rtx op0 = expand_normal (arg0);
19235 rtx op1 = expand_normal (arg1);
19236 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19237 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19238 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19240 if (VECTOR_MODE_P (mode0))
19241 op0 = safe_vector_operand (op0, mode0);
19242 if (VECTOR_MODE_P (mode1))
19243 op1 = safe_vector_operand (op1, mode1);
19246 || GET_MODE (target) != tmode
19247 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19248 target = gen_reg_rtx (tmode);
19250 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
19252 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19253 op0 = copy_to_mode_reg (mode0, op0);
19254 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19255 op1 = copy_to_mode_reg (mode1, op1);
19257 pat = GEN_FCN (icode) (target, op0, op1);
19264 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19267 arm_expand_unop_builtin (enum insn_code icode,
19268 tree exp, rtx target, int do_load)
19271 tree arg0 = CALL_EXPR_ARG (exp, 0);
19272 rtx op0 = expand_normal (arg0);
19273 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19274 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19277 || GET_MODE (target) != tmode
19278 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19279 target = gen_reg_rtx (tmode);
19281 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19284 if (VECTOR_MODE_P (mode0))
19285 op0 = safe_vector_operand (op0, mode0);
19287 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19288 op0 = copy_to_mode_reg (mode0, op0);
19291 pat = GEN_FCN (icode) (target, op0);
19299 neon_builtin_compare (const void *a, const void *b)
19301 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
19302 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
19303 unsigned int soughtcode = key->base_fcode;
19305 if (soughtcode >= memb->base_fcode
19306 && soughtcode < memb->base_fcode + memb->num_vars)
19308 else if (soughtcode < memb->base_fcode)
19314 static enum insn_code
19315 locate_neon_builtin_icode (int fcode, neon_itype *itype)
19317 neon_builtin_datum key
19318 = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 };
19319 neon_builtin_datum *found;
19322 key.base_fcode = fcode;
19323 found = (neon_builtin_datum *)
19324 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
19325 sizeof (neon_builtin_data[0]), neon_builtin_compare);
19326 gcc_assert (found);
19327 idx = fcode - (int) found->base_fcode;
19328 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
19331 *itype = found->itype;
19333 return found->codes[idx];
19337 NEON_ARG_COPY_TO_REG,
19342 #define NEON_MAX_BUILTIN_ARGS 5
19344 /* Expand a Neon builtin. */
19346 arm_expand_neon_args (rtx target, int icode, int have_retval,
19351 tree arg[NEON_MAX_BUILTIN_ARGS];
19352 rtx op[NEON_MAX_BUILTIN_ARGS];
19353 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19354 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
19359 || GET_MODE (target) != tmode
19360 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
19361 target = gen_reg_rtx (tmode);
19363 va_start (ap, exp);
19367 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
19369 if (thisarg == NEON_ARG_STOP)
19373 arg[argc] = CALL_EXPR_ARG (exp, argc);
19374 op[argc] = expand_normal (arg[argc]);
19375 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
19379 case NEON_ARG_COPY_TO_REG:
19380 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19381 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19382 (op[argc], mode[argc]))
19383 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19386 case NEON_ARG_CONSTANT:
19387 /* FIXME: This error message is somewhat unhelpful. */
19388 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
19389 (op[argc], mode[argc]))
19390 error ("argument must be a constant");
19393 case NEON_ARG_STOP:
19394 gcc_unreachable ();
19407 pat = GEN_FCN (icode) (target, op[0]);
19411 pat = GEN_FCN (icode) (target, op[0], op[1]);
19415 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19419 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19423 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19427 gcc_unreachable ();
19433 pat = GEN_FCN (icode) (op[0]);
19437 pat = GEN_FCN (icode) (op[0], op[1]);
19441 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19445 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19449 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19453 gcc_unreachable ();
19464 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19465 constants defined per-instruction or per instruction-variant. Instead, the
19466 required info is looked up in the table neon_builtin_data. */
19468 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19471 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
19478 return arm_expand_neon_args (target, icode, 1, exp,
19479 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19483 case NEON_SCALARMUL:
19484 case NEON_SCALARMULL:
19485 case NEON_SCALARMULH:
19486 case NEON_SHIFTINSERT:
19487 case NEON_LOGICBINOP:
19488 return arm_expand_neon_args (target, icode, 1, exp,
19489 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19493 return arm_expand_neon_args (target, icode, 1, exp,
19494 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19495 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19499 case NEON_SHIFTIMM:
19500 return arm_expand_neon_args (target, icode, 1, exp,
19501 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19505 return arm_expand_neon_args (target, icode, 1, exp,
19506 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19510 case NEON_REINTERP:
19511 return arm_expand_neon_args (target, icode, 1, exp,
19512 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19516 return arm_expand_neon_args (target, icode, 1, exp,
19517 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19519 case NEON_RESULTPAIR:
19520 return arm_expand_neon_args (target, icode, 0, exp,
19521 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19525 case NEON_LANEMULL:
19526 case NEON_LANEMULH:
19527 return arm_expand_neon_args (target, icode, 1, exp,
19528 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19529 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19532 return arm_expand_neon_args (target, icode, 1, exp,
19533 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19534 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19536 case NEON_SHIFTACC:
19537 return arm_expand_neon_args (target, icode, 1, exp,
19538 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19539 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19541 case NEON_SCALARMAC:
19542 return arm_expand_neon_args (target, icode, 1, exp,
19543 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19544 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19548 return arm_expand_neon_args (target, icode, 1, exp,
19549 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19553 case NEON_LOADSTRUCT:
19554 return arm_expand_neon_args (target, icode, 1, exp,
19555 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19557 case NEON_LOAD1LANE:
19558 case NEON_LOADSTRUCTLANE:
19559 return arm_expand_neon_args (target, icode, 1, exp,
19560 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19564 case NEON_STORESTRUCT:
19565 return arm_expand_neon_args (target, icode, 0, exp,
19566 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19568 case NEON_STORE1LANE:
19569 case NEON_STORESTRUCTLANE:
19570 return arm_expand_neon_args (target, icode, 0, exp,
19571 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19575 gcc_unreachable ();
19578 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19580 neon_reinterpret (rtx dest, rtx src)
19582 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19585 /* Emit code to place a Neon pair result in memory locations (with equal
19588 neon_emit_pair_result_insn (enum machine_mode mode,
19589 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19592 rtx mem = gen_rtx_MEM (mode, destaddr);
19593 rtx tmp1 = gen_reg_rtx (mode);
19594 rtx tmp2 = gen_reg_rtx (mode);
19596 emit_insn (intfn (tmp1, op1, tmp2, op2));
19598 emit_move_insn (mem, tmp1);
19599 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19600 emit_move_insn (mem, tmp2);
19603 /* Set up operands for a register copy from src to dest, taking care not to
19604 clobber registers in the process.
19605 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19606 be called with a large N, so that should be OK. */
19609 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19611 unsigned int copied = 0, opctr = 0;
19612 unsigned int done = (1 << count) - 1;
19615 while (copied != done)
19617 for (i = 0; i < count; i++)
19621 for (j = 0; good && j < count; j++)
19622 if (i != j && (copied & (1 << j)) == 0
19623 && reg_overlap_mentioned_p (src[j], dest[i]))
19628 operands[opctr++] = dest[i];
19629 operands[opctr++] = src[i];
19635 gcc_assert (opctr == count * 2);
19638 /* Expand an expression EXP that calls a built-in function,
19639 with result going to TARGET if that's convenient
19640 (and in mode MODE if that's convenient).
19641 SUBTARGET may be used as the target for computing one of EXP's operands.
19642 IGNORE is nonzero if the value is to be ignored. */
19645 arm_expand_builtin (tree exp,
19647 rtx subtarget ATTRIBUTE_UNUSED,
19648 enum machine_mode mode ATTRIBUTE_UNUSED,
19649 int ignore ATTRIBUTE_UNUSED)
19651 const struct builtin_description * d;
19652 enum insn_code icode;
19653 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19661 int fcode = DECL_FUNCTION_CODE (fndecl);
19663 enum machine_mode tmode;
19664 enum machine_mode mode0;
19665 enum machine_mode mode1;
19666 enum machine_mode mode2;
19668 if (fcode >= ARM_BUILTIN_NEON_BASE)
19669 return arm_expand_neon_builtin (fcode, exp, target);
19673 case ARM_BUILTIN_TEXTRMSB:
19674 case ARM_BUILTIN_TEXTRMUB:
19675 case ARM_BUILTIN_TEXTRMSH:
19676 case ARM_BUILTIN_TEXTRMUH:
19677 case ARM_BUILTIN_TEXTRMSW:
19678 case ARM_BUILTIN_TEXTRMUW:
19679 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19680 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19681 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19682 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19683 : CODE_FOR_iwmmxt_textrmw);
19685 arg0 = CALL_EXPR_ARG (exp, 0);
19686 arg1 = CALL_EXPR_ARG (exp, 1);
19687 op0 = expand_normal (arg0);
19688 op1 = expand_normal (arg1);
19689 tmode = insn_data[icode].operand[0].mode;
19690 mode0 = insn_data[icode].operand[1].mode;
19691 mode1 = insn_data[icode].operand[2].mode;
19693 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19694 op0 = copy_to_mode_reg (mode0, op0);
19695 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19697 /* @@@ better error message */
19698 error ("selector must be an immediate");
19699 return gen_reg_rtx (tmode);
19702 || GET_MODE (target) != tmode
19703 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19704 target = gen_reg_rtx (tmode);
19705 pat = GEN_FCN (icode) (target, op0, op1);
19711 case ARM_BUILTIN_TINSRB:
19712 case ARM_BUILTIN_TINSRH:
19713 case ARM_BUILTIN_TINSRW:
19714 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19715 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19716 : CODE_FOR_iwmmxt_tinsrw);
19717 arg0 = CALL_EXPR_ARG (exp, 0);
19718 arg1 = CALL_EXPR_ARG (exp, 1);
19719 arg2 = CALL_EXPR_ARG (exp, 2);
19720 op0 = expand_normal (arg0);
19721 op1 = expand_normal (arg1);
19722 op2 = expand_normal (arg2);
19723 tmode = insn_data[icode].operand[0].mode;
19724 mode0 = insn_data[icode].operand[1].mode;
19725 mode1 = insn_data[icode].operand[2].mode;
19726 mode2 = insn_data[icode].operand[3].mode;
19728 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19729 op0 = copy_to_mode_reg (mode0, op0);
19730 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19731 op1 = copy_to_mode_reg (mode1, op1);
19732 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19734 /* @@@ better error message */
19735 error ("selector must be an immediate");
19739 || GET_MODE (target) != tmode
19740 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19741 target = gen_reg_rtx (tmode);
19742 pat = GEN_FCN (icode) (target, op0, op1, op2);
19748 case ARM_BUILTIN_SETWCX:
19749 arg0 = CALL_EXPR_ARG (exp, 0);
19750 arg1 = CALL_EXPR_ARG (exp, 1);
19751 op0 = force_reg (SImode, expand_normal (arg0));
19752 op1 = expand_normal (arg1);
19753 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19756 case ARM_BUILTIN_GETWCX:
19757 arg0 = CALL_EXPR_ARG (exp, 0);
19758 op0 = expand_normal (arg0);
19759 target = gen_reg_rtx (SImode);
19760 emit_insn (gen_iwmmxt_tmrc (target, op0));
19763 case ARM_BUILTIN_WSHUFH:
19764 icode = CODE_FOR_iwmmxt_wshufh;
19765 arg0 = CALL_EXPR_ARG (exp, 0);
19766 arg1 = CALL_EXPR_ARG (exp, 1);
19767 op0 = expand_normal (arg0);
19768 op1 = expand_normal (arg1);
19769 tmode = insn_data[icode].operand[0].mode;
19770 mode1 = insn_data[icode].operand[1].mode;
19771 mode2 = insn_data[icode].operand[2].mode;
19773 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19774 op0 = copy_to_mode_reg (mode1, op0);
19775 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19777 /* @@@ better error message */
19778 error ("mask must be an immediate");
19782 || GET_MODE (target) != tmode
19783 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19784 target = gen_reg_rtx (tmode);
19785 pat = GEN_FCN (icode) (target, op0, op1);
19791 case ARM_BUILTIN_WSADB:
19792 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19793 case ARM_BUILTIN_WSADH:
19794 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19795 case ARM_BUILTIN_WSADBZ:
19796 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19797 case ARM_BUILTIN_WSADHZ:
19798 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19800 /* Several three-argument builtins. */
19801 case ARM_BUILTIN_WMACS:
19802 case ARM_BUILTIN_WMACU:
19803 case ARM_BUILTIN_WALIGN:
19804 case ARM_BUILTIN_TMIA:
19805 case ARM_BUILTIN_TMIAPH:
19806 case ARM_BUILTIN_TMIATT:
19807 case ARM_BUILTIN_TMIATB:
19808 case ARM_BUILTIN_TMIABT:
19809 case ARM_BUILTIN_TMIABB:
19810 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19811 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19812 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19813 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19814 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19815 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19816 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19817 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19818 : CODE_FOR_iwmmxt_walign);
19819 arg0 = CALL_EXPR_ARG (exp, 0);
19820 arg1 = CALL_EXPR_ARG (exp, 1);
19821 arg2 = CALL_EXPR_ARG (exp, 2);
19822 op0 = expand_normal (arg0);
19823 op1 = expand_normal (arg1);
19824 op2 = expand_normal (arg2);
19825 tmode = insn_data[icode].operand[0].mode;
19826 mode0 = insn_data[icode].operand[1].mode;
19827 mode1 = insn_data[icode].operand[2].mode;
19828 mode2 = insn_data[icode].operand[3].mode;
19830 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19831 op0 = copy_to_mode_reg (mode0, op0);
19832 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19833 op1 = copy_to_mode_reg (mode1, op1);
19834 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19835 op2 = copy_to_mode_reg (mode2, op2);
19837 || GET_MODE (target) != tmode
19838 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19839 target = gen_reg_rtx (tmode);
19840 pat = GEN_FCN (icode) (target, op0, op1, op2);
19846 case ARM_BUILTIN_WZERO:
19847 target = gen_reg_rtx (DImode);
19848 emit_insn (gen_iwmmxt_clrdi (target));
19851 case ARM_BUILTIN_THREAD_POINTER:
19852 return arm_load_tp (target);
19858 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19859 if (d->code == (const enum arm_builtins) fcode)
19860 return arm_expand_binop_builtin (d->icode, exp, target);
19862 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19863 if (d->code == (const enum arm_builtins) fcode)
19864 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19866 /* @@@ Should really do something sensible here. */
19870 /* Return the number (counting from 0) of
19871 the least significant set bit in MASK. */
19874 number_of_first_bit_set (unsigned mask)
19879 (mask & (1 << bit)) == 0;
19886 /* Emit code to push or pop registers to or from the stack. F is the
19887 assembly file. MASK is the registers to push or pop. PUSH is
19888 nonzero if we should push, and zero if we should pop. For debugging
19889 output, if pushing, adjust CFA_OFFSET by the amount of space added
19890 to the stack. REAL_REGS should have the same number of bits set as
19891 MASK, and will be used instead (in the same order) to describe which
19892 registers were saved - this is used to mark the save slots when we
19893 push high registers after moving them to low registers. */
19895 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19896 unsigned long real_regs)
19899 int lo_mask = mask & 0xFF;
19900 int pushed_words = 0;
19904 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19906 /* Special case. Do not generate a POP PC statement here, do it in
19908 thumb_exit (f, -1);
19912 if (push && arm_except_unwind_info (&global_options) == UI_TARGET)
19914 fprintf (f, "\t.save\t{");
19915 for (regno = 0; regno < 15; regno++)
19917 if (real_regs & (1 << regno))
19919 if (real_regs & ((1 << regno) -1))
19921 asm_fprintf (f, "%r", regno);
19924 fprintf (f, "}\n");
19927 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19929 /* Look at the low registers first. */
19930 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19934 asm_fprintf (f, "%r", regno);
19936 if ((lo_mask & ~1) != 0)
19943 if (push && (mask & (1 << LR_REGNUM)))
19945 /* Catch pushing the LR. */
19949 asm_fprintf (f, "%r", LR_REGNUM);
19953 else if (!push && (mask & (1 << PC_REGNUM)))
19955 /* Catch popping the PC. */
19956 if (TARGET_INTERWORK || TARGET_BACKTRACE
19957 || crtl->calls_eh_return)
19959 /* The PC is never poped directly, instead
19960 it is popped into r3 and then BX is used. */
19961 fprintf (f, "}\n");
19963 thumb_exit (f, -1);
19972 asm_fprintf (f, "%r", PC_REGNUM);
19976 fprintf (f, "}\n");
19978 if (push && pushed_words && dwarf2out_do_frame ())
19980 char *l = dwarf2out_cfi_label (false);
19981 int pushed_mask = real_regs;
19983 *cfa_offset += pushed_words * 4;
19984 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19987 pushed_mask = real_regs;
19988 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19990 if (pushed_mask & 1)
19991 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
19996 /* Generate code to return from a thumb function.
19997 If 'reg_containing_return_addr' is -1, then the return address is
19998 actually on the stack, at the stack pointer. */
20000 thumb_exit (FILE *f, int reg_containing_return_addr)
20002 unsigned regs_available_for_popping;
20003 unsigned regs_to_pop;
20005 unsigned available;
20009 int restore_a4 = FALSE;
20011 /* Compute the registers we need to pop. */
20015 if (reg_containing_return_addr == -1)
20017 regs_to_pop |= 1 << LR_REGNUM;
20021 if (TARGET_BACKTRACE)
20023 /* Restore the (ARM) frame pointer and stack pointer. */
20024 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
20028 /* If there is nothing to pop then just emit the BX instruction and
20030 if (pops_needed == 0)
20032 if (crtl->calls_eh_return)
20033 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20035 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20038 /* Otherwise if we are not supporting interworking and we have not created
20039 a backtrace structure and the function was not entered in ARM mode then
20040 just pop the return address straight into the PC. */
20041 else if (!TARGET_INTERWORK
20042 && !TARGET_BACKTRACE
20043 && !is_called_in_ARM_mode (current_function_decl)
20044 && !crtl->calls_eh_return)
20046 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
20050 /* Find out how many of the (return) argument registers we can corrupt. */
20051 regs_available_for_popping = 0;
20053 /* If returning via __builtin_eh_return, the bottom three registers
20054 all contain information needed for the return. */
20055 if (crtl->calls_eh_return)
20059 /* If we can deduce the registers used from the function's
20060 return value. This is more reliable that examining
20061 df_regs_ever_live_p () because that will be set if the register is
20062 ever used in the function, not just if the register is used
20063 to hold a return value. */
20065 if (crtl->return_rtx != 0)
20066 mode = GET_MODE (crtl->return_rtx);
20068 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20070 size = GET_MODE_SIZE (mode);
20074 /* In a void function we can use any argument register.
20075 In a function that returns a structure on the stack
20076 we can use the second and third argument registers. */
20077 if (mode == VOIDmode)
20078 regs_available_for_popping =
20079 (1 << ARG_REGISTER (1))
20080 | (1 << ARG_REGISTER (2))
20081 | (1 << ARG_REGISTER (3));
20083 regs_available_for_popping =
20084 (1 << ARG_REGISTER (2))
20085 | (1 << ARG_REGISTER (3));
20087 else if (size <= 4)
20088 regs_available_for_popping =
20089 (1 << ARG_REGISTER (2))
20090 | (1 << ARG_REGISTER (3));
20091 else if (size <= 8)
20092 regs_available_for_popping =
20093 (1 << ARG_REGISTER (3));
20096 /* Match registers to be popped with registers into which we pop them. */
20097 for (available = regs_available_for_popping,
20098 required = regs_to_pop;
20099 required != 0 && available != 0;
20100 available &= ~(available & - available),
20101 required &= ~(required & - required))
20104 /* If we have any popping registers left over, remove them. */
20106 regs_available_for_popping &= ~available;
20108 /* Otherwise if we need another popping register we can use
20109 the fourth argument register. */
20110 else if (pops_needed)
20112 /* If we have not found any free argument registers and
20113 reg a4 contains the return address, we must move it. */
20114 if (regs_available_for_popping == 0
20115 && reg_containing_return_addr == LAST_ARG_REGNUM)
20117 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20118 reg_containing_return_addr = LR_REGNUM;
20120 else if (size > 12)
20122 /* Register a4 is being used to hold part of the return value,
20123 but we have dire need of a free, low register. */
20126 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
20129 if (reg_containing_return_addr != LAST_ARG_REGNUM)
20131 /* The fourth argument register is available. */
20132 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
20138 /* Pop as many registers as we can. */
20139 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20140 regs_available_for_popping);
20142 /* Process the registers we popped. */
20143 if (reg_containing_return_addr == -1)
20145 /* The return address was popped into the lowest numbered register. */
20146 regs_to_pop &= ~(1 << LR_REGNUM);
20148 reg_containing_return_addr =
20149 number_of_first_bit_set (regs_available_for_popping);
20151 /* Remove this register for the mask of available registers, so that
20152 the return address will not be corrupted by further pops. */
20153 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
20156 /* If we popped other registers then handle them here. */
20157 if (regs_available_for_popping)
20161 /* Work out which register currently contains the frame pointer. */
20162 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
20164 /* Move it into the correct place. */
20165 asm_fprintf (f, "\tmov\t%r, %r\n",
20166 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
20168 /* (Temporarily) remove it from the mask of popped registers. */
20169 regs_available_for_popping &= ~(1 << frame_pointer);
20170 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
20172 if (regs_available_for_popping)
20176 /* We popped the stack pointer as well,
20177 find the register that contains it. */
20178 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
20180 /* Move it into the stack register. */
20181 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
20183 /* At this point we have popped all necessary registers, so
20184 do not worry about restoring regs_available_for_popping
20185 to its correct value:
20187 assert (pops_needed == 0)
20188 assert (regs_available_for_popping == (1 << frame_pointer))
20189 assert (regs_to_pop == (1 << STACK_POINTER)) */
20193 /* Since we have just move the popped value into the frame
20194 pointer, the popping register is available for reuse, and
20195 we know that we still have the stack pointer left to pop. */
20196 regs_available_for_popping |= (1 << frame_pointer);
20200 /* If we still have registers left on the stack, but we no longer have
20201 any registers into which we can pop them, then we must move the return
20202 address into the link register and make available the register that
20204 if (regs_available_for_popping == 0 && pops_needed > 0)
20206 regs_available_for_popping |= 1 << reg_containing_return_addr;
20208 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
20209 reg_containing_return_addr);
20211 reg_containing_return_addr = LR_REGNUM;
20214 /* If we have registers left on the stack then pop some more.
20215 We know that at most we will want to pop FP and SP. */
20216 if (pops_needed > 0)
20221 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20222 regs_available_for_popping);
20224 /* We have popped either FP or SP.
20225 Move whichever one it is into the correct register. */
20226 popped_into = number_of_first_bit_set (regs_available_for_popping);
20227 move_to = number_of_first_bit_set (regs_to_pop);
20229 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
20231 regs_to_pop &= ~(1 << move_to);
20236 /* If we still have not popped everything then we must have only
20237 had one register available to us and we are now popping the SP. */
20238 if (pops_needed > 0)
20242 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20243 regs_available_for_popping);
20245 popped_into = number_of_first_bit_set (regs_available_for_popping);
20247 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
20249 assert (regs_to_pop == (1 << STACK_POINTER))
20250 assert (pops_needed == 1)
20254 /* If necessary restore the a4 register. */
20257 if (reg_containing_return_addr != LR_REGNUM)
20259 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20260 reg_containing_return_addr = LR_REGNUM;
20263 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
20266 if (crtl->calls_eh_return)
20267 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20269 /* Return to caller. */
20270 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20273 /* Scan INSN just before assembler is output for it.
20274 For Thumb-1, we track the status of the condition codes; this
20275 information is used in the cbranchsi4_insn pattern. */
20277 thumb1_final_prescan_insn (rtx insn)
20279 if (flag_print_asm_name)
20280 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
20281 INSN_ADDRESSES (INSN_UID (insn)));
20282 /* Don't overwrite the previous setter when we get to a cbranch. */
20283 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
20285 enum attr_conds conds;
20287 if (cfun->machine->thumb1_cc_insn)
20289 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
20290 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
20293 conds = get_attr_conds (insn);
20294 if (conds == CONDS_SET)
20296 rtx set = single_set (insn);
20297 cfun->machine->thumb1_cc_insn = insn;
20298 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20299 cfun->machine->thumb1_cc_op1 = const0_rtx;
20300 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20301 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20303 rtx src1 = XEXP (SET_SRC (set), 1);
20304 if (src1 == const0_rtx)
20305 cfun->machine->thumb1_cc_mode = CCmode;
20308 else if (conds != CONDS_NOCOND)
20309 cfun->machine->thumb1_cc_insn = NULL_RTX;
20314 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
20316 unsigned HOST_WIDE_INT mask = 0xff;
20319 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
20320 if (val == 0) /* XXX */
20323 for (i = 0; i < 25; i++)
20324 if ((val & (mask << i)) == val)
20330 /* Returns nonzero if the current function contains,
20331 or might contain a far jump. */
20333 thumb_far_jump_used_p (void)
20337 /* This test is only important for leaf functions. */
20338 /* assert (!leaf_function_p ()); */
20340 /* If we have already decided that far jumps may be used,
20341 do not bother checking again, and always return true even if
20342 it turns out that they are not being used. Once we have made
20343 the decision that far jumps are present (and that hence the link
20344 register will be pushed onto the stack) we cannot go back on it. */
20345 if (cfun->machine->far_jump_used)
20348 /* If this function is not being called from the prologue/epilogue
20349 generation code then it must be being called from the
20350 INITIAL_ELIMINATION_OFFSET macro. */
20351 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
20353 /* In this case we know that we are being asked about the elimination
20354 of the arg pointer register. If that register is not being used,
20355 then there are no arguments on the stack, and we do not have to
20356 worry that a far jump might force the prologue to push the link
20357 register, changing the stack offsets. In this case we can just
20358 return false, since the presence of far jumps in the function will
20359 not affect stack offsets.
20361 If the arg pointer is live (or if it was live, but has now been
20362 eliminated and so set to dead) then we do have to test to see if
20363 the function might contain a far jump. This test can lead to some
20364 false negatives, since before reload is completed, then length of
20365 branch instructions is not known, so gcc defaults to returning their
20366 longest length, which in turn sets the far jump attribute to true.
20368 A false negative will not result in bad code being generated, but it
20369 will result in a needless push and pop of the link register. We
20370 hope that this does not occur too often.
20372 If we need doubleword stack alignment this could affect the other
20373 elimination offsets so we can't risk getting it wrong. */
20374 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
20375 cfun->machine->arg_pointer_live = 1;
20376 else if (!cfun->machine->arg_pointer_live)
20380 /* Check to see if the function contains a branch
20381 insn with the far jump attribute set. */
20382 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20384 if (GET_CODE (insn) == JUMP_INSN
20385 /* Ignore tablejump patterns. */
20386 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20387 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20388 && get_attr_far_jump (insn) == FAR_JUMP_YES
20391 /* Record the fact that we have decided that
20392 the function does use far jumps. */
20393 cfun->machine->far_jump_used = 1;
20401 /* Return nonzero if FUNC must be entered in ARM mode. */
20403 is_called_in_ARM_mode (tree func)
20405 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20407 /* Ignore the problem about functions whose address is taken. */
20408 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20412 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20418 /* Given the stack offsets and register mask in OFFSETS, decide how
20419 many additional registers to push instead of subtracting a constant
20420 from SP. For epilogues the principle is the same except we use pop.
20421 FOR_PROLOGUE indicates which we're generating. */
20423 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20425 HOST_WIDE_INT amount;
20426 unsigned long live_regs_mask = offsets->saved_regs_mask;
20427 /* Extract a mask of the ones we can give to the Thumb's push/pop
20429 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20430 /* Then count how many other high registers will need to be pushed. */
20431 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20432 int n_free, reg_base;
20434 if (!for_prologue && frame_pointer_needed)
20435 amount = offsets->locals_base - offsets->saved_regs;
20437 amount = offsets->outgoing_args - offsets->saved_regs;
20439 /* If the stack frame size is 512 exactly, we can save one load
20440 instruction, which should make this a win even when optimizing
20442 if (!optimize_size && amount != 512)
20445 /* Can't do this if there are high registers to push. */
20446 if (high_regs_pushed != 0)
20449 /* Shouldn't do it in the prologue if no registers would normally
20450 be pushed at all. In the epilogue, also allow it if we'll have
20451 a pop insn for the PC. */
20454 || TARGET_BACKTRACE
20455 || (live_regs_mask & 1 << LR_REGNUM) == 0
20456 || TARGET_INTERWORK
20457 || crtl->args.pretend_args_size != 0))
20460 /* Don't do this if thumb_expand_prologue wants to emit instructions
20461 between the push and the stack frame allocation. */
20463 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20464 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20471 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20472 live_regs_mask >>= reg_base;
20475 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20476 && (for_prologue || call_used_regs[reg_base + n_free]))
20478 live_regs_mask >>= 1;
20484 gcc_assert (amount / 4 * 4 == amount);
20486 if (amount >= 512 && (amount - n_free * 4) < 512)
20487 return (amount - 508) / 4;
20488 if (amount <= n_free * 4)
20493 /* The bits which aren't usefully expanded as rtl. */
20495 thumb_unexpanded_epilogue (void)
20497 arm_stack_offsets *offsets;
20499 unsigned long live_regs_mask = 0;
20500 int high_regs_pushed = 0;
20502 int had_to_push_lr;
20505 if (cfun->machine->return_used_this_function != 0)
20508 if (IS_NAKED (arm_current_func_type ()))
20511 offsets = arm_get_frame_offsets ();
20512 live_regs_mask = offsets->saved_regs_mask;
20513 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20515 /* If we can deduce the registers used from the function's return value.
20516 This is more reliable that examining df_regs_ever_live_p () because that
20517 will be set if the register is ever used in the function, not just if
20518 the register is used to hold a return value. */
20519 size = arm_size_return_regs ();
20521 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20524 unsigned long extra_mask = (1 << extra_pop) - 1;
20525 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20528 /* The prolog may have pushed some high registers to use as
20529 work registers. e.g. the testsuite file:
20530 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20531 compiles to produce:
20532 push {r4, r5, r6, r7, lr}
20536 as part of the prolog. We have to undo that pushing here. */
20538 if (high_regs_pushed)
20540 unsigned long mask = live_regs_mask & 0xff;
20543 /* The available low registers depend on the size of the value we are
20551 /* Oh dear! We have no low registers into which we can pop
20554 ("no low registers available for popping high registers");
20556 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20557 if (live_regs_mask & (1 << next_hi_reg))
20560 while (high_regs_pushed)
20562 /* Find lo register(s) into which the high register(s) can
20564 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20566 if (mask & (1 << regno))
20567 high_regs_pushed--;
20568 if (high_regs_pushed == 0)
20572 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20574 /* Pop the values into the low register(s). */
20575 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20577 /* Move the value(s) into the high registers. */
20578 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20580 if (mask & (1 << regno))
20582 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20585 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20586 if (live_regs_mask & (1 << next_hi_reg))
20591 live_regs_mask &= ~0x0f00;
20594 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20595 live_regs_mask &= 0xff;
20597 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20599 /* Pop the return address into the PC. */
20600 if (had_to_push_lr)
20601 live_regs_mask |= 1 << PC_REGNUM;
20603 /* Either no argument registers were pushed or a backtrace
20604 structure was created which includes an adjusted stack
20605 pointer, so just pop everything. */
20606 if (live_regs_mask)
20607 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20610 /* We have either just popped the return address into the
20611 PC or it is was kept in LR for the entire function.
20612 Note that thumb_pushpop has already called thumb_exit if the
20613 PC was in the list. */
20614 if (!had_to_push_lr)
20615 thumb_exit (asm_out_file, LR_REGNUM);
20619 /* Pop everything but the return address. */
20620 if (live_regs_mask)
20621 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20624 if (had_to_push_lr)
20628 /* We have no free low regs, so save one. */
20629 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20633 /* Get the return address into a temporary register. */
20634 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20635 1 << LAST_ARG_REGNUM);
20639 /* Move the return address to lr. */
20640 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20642 /* Restore the low register. */
20643 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20648 regno = LAST_ARG_REGNUM;
20653 /* Remove the argument registers that were pushed onto the stack. */
20654 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20655 SP_REGNUM, SP_REGNUM,
20656 crtl->args.pretend_args_size);
20658 thumb_exit (asm_out_file, regno);
20664 /* Functions to save and restore machine-specific function data. */
20665 static struct machine_function *
20666 arm_init_machine_status (void)
20668 struct machine_function *machine;
20669 machine = ggc_alloc_cleared_machine_function ();
20671 #if ARM_FT_UNKNOWN != 0
20672 machine->func_type = ARM_FT_UNKNOWN;
20677 /* Return an RTX indicating where the return address to the
20678 calling function can be found. */
20680 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20685 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20688 /* Do anything needed before RTL is emitted for each function. */
20690 arm_init_expanders (void)
20692 /* Arrange to initialize and mark the machine per-function status. */
20693 init_machine_status = arm_init_machine_status;
20695 /* This is to stop the combine pass optimizing away the alignment
20696 adjustment of va_arg. */
20697 /* ??? It is claimed that this should not be necessary. */
20699 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20703 /* Like arm_compute_initial_elimination offset. Simpler because there
20704 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20705 to point at the base of the local variables after static stack
20706 space for a function has been allocated. */
20709 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20711 arm_stack_offsets *offsets;
20713 offsets = arm_get_frame_offsets ();
20717 case ARG_POINTER_REGNUM:
20720 case STACK_POINTER_REGNUM:
20721 return offsets->outgoing_args - offsets->saved_args;
20723 case FRAME_POINTER_REGNUM:
20724 return offsets->soft_frame - offsets->saved_args;
20726 case ARM_HARD_FRAME_POINTER_REGNUM:
20727 return offsets->saved_regs - offsets->saved_args;
20729 case THUMB_HARD_FRAME_POINTER_REGNUM:
20730 return offsets->locals_base - offsets->saved_args;
20733 gcc_unreachable ();
20737 case FRAME_POINTER_REGNUM:
20740 case STACK_POINTER_REGNUM:
20741 return offsets->outgoing_args - offsets->soft_frame;
20743 case ARM_HARD_FRAME_POINTER_REGNUM:
20744 return offsets->saved_regs - offsets->soft_frame;
20746 case THUMB_HARD_FRAME_POINTER_REGNUM:
20747 return offsets->locals_base - offsets->soft_frame;
20750 gcc_unreachable ();
20755 gcc_unreachable ();
20759 /* Generate the rest of a function's prologue. */
20761 thumb1_expand_prologue (void)
20765 HOST_WIDE_INT amount;
20766 arm_stack_offsets *offsets;
20767 unsigned long func_type;
20769 unsigned long live_regs_mask;
20771 func_type = arm_current_func_type ();
20773 /* Naked functions don't have prologues. */
20774 if (IS_NAKED (func_type))
20777 if (IS_INTERRUPT (func_type))
20779 error ("interrupt Service Routines cannot be coded in Thumb mode");
20783 offsets = arm_get_frame_offsets ();
20784 live_regs_mask = offsets->saved_regs_mask;
20785 /* Load the pic register before setting the frame pointer,
20786 so we can use r7 as a temporary work register. */
20787 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20788 arm_load_pic_register (live_regs_mask);
20790 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20791 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20792 stack_pointer_rtx);
20794 if (flag_stack_usage)
20795 current_function_static_stack_size
20796 = offsets->outgoing_args - offsets->saved_args;
20798 amount = offsets->outgoing_args - offsets->saved_regs;
20799 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20804 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20805 GEN_INT (- amount)));
20806 RTX_FRAME_RELATED_P (insn) = 1;
20812 /* The stack decrement is too big for an immediate value in a single
20813 insn. In theory we could issue multiple subtracts, but after
20814 three of them it becomes more space efficient to place the full
20815 value in the constant pool and load into a register. (Also the
20816 ARM debugger really likes to see only one stack decrement per
20817 function). So instead we look for a scratch register into which
20818 we can load the decrement, and then we subtract this from the
20819 stack pointer. Unfortunately on the thumb the only available
20820 scratch registers are the argument registers, and we cannot use
20821 these as they may hold arguments to the function. Instead we
20822 attempt to locate a call preserved register which is used by this
20823 function. If we can find one, then we know that it will have
20824 been pushed at the start of the prologue and so we can corrupt
20826 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20827 if (live_regs_mask & (1 << regno))
20830 gcc_assert(regno <= LAST_LO_REGNUM);
20832 reg = gen_rtx_REG (SImode, regno);
20834 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20836 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20837 stack_pointer_rtx, reg));
20838 RTX_FRAME_RELATED_P (insn) = 1;
20839 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20840 plus_constant (stack_pointer_rtx,
20842 RTX_FRAME_RELATED_P (dwarf) = 1;
20843 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20847 if (frame_pointer_needed)
20848 thumb_set_frame_pointer (offsets);
20850 /* If we are profiling, make sure no instructions are scheduled before
20851 the call to mcount. Similarly if the user has requested no
20852 scheduling in the prolog. Similarly if we want non-call exceptions
20853 using the EABI unwinder, to prevent faulting instructions from being
20854 swapped with a stack adjustment. */
20855 if (crtl->profile || !TARGET_SCHED_PROLOG
20856 || (arm_except_unwind_info (&global_options) == UI_TARGET
20857 && cfun->can_throw_non_call_exceptions))
20858 emit_insn (gen_blockage ());
20860 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20861 if (live_regs_mask & 0xff)
20862 cfun->machine->lr_save_eliminated = 0;
20867 thumb1_expand_epilogue (void)
20869 HOST_WIDE_INT amount;
20870 arm_stack_offsets *offsets;
20873 /* Naked functions don't have prologues. */
20874 if (IS_NAKED (arm_current_func_type ()))
20877 offsets = arm_get_frame_offsets ();
20878 amount = offsets->outgoing_args - offsets->saved_regs;
20880 if (frame_pointer_needed)
20882 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20883 amount = offsets->locals_base - offsets->saved_regs;
20885 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20887 gcc_assert (amount >= 0);
20891 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20892 GEN_INT (amount)));
20895 /* r3 is always free in the epilogue. */
20896 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20898 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20899 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20903 /* Emit a USE (stack_pointer_rtx), so that
20904 the stack adjustment will not be deleted. */
20905 emit_insn (gen_prologue_use (stack_pointer_rtx));
20907 if (crtl->profile || !TARGET_SCHED_PROLOG)
20908 emit_insn (gen_blockage ());
20910 /* Emit a clobber for each insn that will be restored in the epilogue,
20911 so that flow2 will get register lifetimes correct. */
20912 for (regno = 0; regno < 13; regno++)
20913 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20914 emit_clobber (gen_rtx_REG (SImode, regno));
20916 if (! df_regs_ever_live_p (LR_REGNUM))
20917 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20921 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20923 arm_stack_offsets *offsets;
20924 unsigned long live_regs_mask = 0;
20925 unsigned long l_mask;
20926 unsigned high_regs_pushed = 0;
20927 int cfa_offset = 0;
20930 if (IS_NAKED (arm_current_func_type ()))
20933 if (is_called_in_ARM_mode (current_function_decl))
20937 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20938 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20940 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20942 /* Generate code sequence to switch us into Thumb mode. */
20943 /* The .code 32 directive has already been emitted by
20944 ASM_DECLARE_FUNCTION_NAME. */
20945 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20946 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20948 /* Generate a label, so that the debugger will notice the
20949 change in instruction sets. This label is also used by
20950 the assembler to bypass the ARM code when this function
20951 is called from a Thumb encoded function elsewhere in the
20952 same file. Hence the definition of STUB_NAME here must
20953 agree with the definition in gas/config/tc-arm.c. */
20955 #define STUB_NAME ".real_start_of"
20957 fprintf (f, "\t.code\t16\n");
20959 if (arm_dllexport_name_p (name))
20960 name = arm_strip_name_encoding (name);
20962 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20963 fprintf (f, "\t.thumb_func\n");
20964 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20967 if (crtl->args.pretend_args_size)
20969 /* Output unwind directive for the stack adjustment. */
20970 if (arm_except_unwind_info (&global_options) == UI_TARGET)
20971 fprintf (f, "\t.pad #%d\n",
20972 crtl->args.pretend_args_size);
20974 if (cfun->machine->uses_anonymous_args)
20978 fprintf (f, "\tpush\t{");
20980 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20982 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20983 regno <= LAST_ARG_REGNUM;
20985 asm_fprintf (f, "%r%s", regno,
20986 regno == LAST_ARG_REGNUM ? "" : ", ");
20988 fprintf (f, "}\n");
20991 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
20992 SP_REGNUM, SP_REGNUM,
20993 crtl->args.pretend_args_size);
20995 /* We don't need to record the stores for unwinding (would it
20996 help the debugger any if we did?), but record the change in
20997 the stack pointer. */
20998 if (dwarf2out_do_frame ())
21000 char *l = dwarf2out_cfi_label (false);
21002 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
21003 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
21007 /* Get the registers we are going to push. */
21008 offsets = arm_get_frame_offsets ();
21009 live_regs_mask = offsets->saved_regs_mask;
21010 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
21011 l_mask = live_regs_mask & 0x40ff;
21012 /* Then count how many other high registers will need to be pushed. */
21013 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21015 if (TARGET_BACKTRACE)
21018 unsigned work_register;
21020 /* We have been asked to create a stack backtrace structure.
21021 The code looks like this:
21025 0 sub SP, #16 Reserve space for 4 registers.
21026 2 push {R7} Push low registers.
21027 4 add R7, SP, #20 Get the stack pointer before the push.
21028 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
21029 8 mov R7, PC Get hold of the start of this code plus 12.
21030 10 str R7, [SP, #16] Store it.
21031 12 mov R7, FP Get hold of the current frame pointer.
21032 14 str R7, [SP, #4] Store it.
21033 16 mov R7, LR Get hold of the current return address.
21034 18 str R7, [SP, #12] Store it.
21035 20 add R7, SP, #16 Point at the start of the backtrace structure.
21036 22 mov FP, R7 Put this value into the frame pointer. */
21038 work_register = thumb_find_work_register (live_regs_mask);
21040 if (arm_except_unwind_info (&global_options) == UI_TARGET)
21041 asm_fprintf (f, "\t.pad #16\n");
21044 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
21045 SP_REGNUM, SP_REGNUM);
21047 if (dwarf2out_do_frame ())
21049 char *l = dwarf2out_cfi_label (false);
21051 cfa_offset = cfa_offset + 16;
21052 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
21057 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
21058 offset = bit_count (l_mask) * UNITS_PER_WORD;
21063 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21064 offset + 16 + crtl->args.pretend_args_size);
21066 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21069 /* Make sure that the instruction fetching the PC is in the right place
21070 to calculate "start of backtrace creation code + 12". */
21073 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21074 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21076 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
21077 ARM_HARD_FRAME_POINTER_REGNUM);
21078 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21083 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
21084 ARM_HARD_FRAME_POINTER_REGNUM);
21085 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21087 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21088 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21092 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
21093 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21095 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21097 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
21098 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
21100 /* Optimization: If we are not pushing any low registers but we are going
21101 to push some high registers then delay our first push. This will just
21102 be a push of LR and we can combine it with the push of the first high
21104 else if ((l_mask & 0xff) != 0
21105 || (high_regs_pushed == 0 && l_mask))
21107 unsigned long mask = l_mask;
21108 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
21109 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
21112 if (high_regs_pushed)
21114 unsigned pushable_regs;
21115 unsigned next_hi_reg;
21117 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
21118 if (live_regs_mask & (1 << next_hi_reg))
21121 pushable_regs = l_mask & 0xff;
21123 if (pushable_regs == 0)
21124 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
21126 while (high_regs_pushed > 0)
21128 unsigned long real_regs_mask = 0;
21130 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
21132 if (pushable_regs & (1 << regno))
21134 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
21136 high_regs_pushed --;
21137 real_regs_mask |= (1 << next_hi_reg);
21139 if (high_regs_pushed)
21141 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
21143 if (live_regs_mask & (1 << next_hi_reg))
21148 pushable_regs &= ~((1 << regno) - 1);
21154 /* If we had to find a work register and we have not yet
21155 saved the LR then add it to the list of regs to push. */
21156 if (l_mask == (1 << LR_REGNUM))
21158 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
21160 real_regs_mask | (1 << LR_REGNUM));
21164 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
21169 /* Handle the case of a double word load into a low register from
21170 a computed memory address. The computed address may involve a
21171 register which is overwritten by the load. */
21173 thumb_load_double_from_address (rtx *operands)
21181 gcc_assert (GET_CODE (operands[0]) == REG);
21182 gcc_assert (GET_CODE (operands[1]) == MEM);
21184 /* Get the memory address. */
21185 addr = XEXP (operands[1], 0);
21187 /* Work out how the memory address is computed. */
21188 switch (GET_CODE (addr))
21191 operands[2] = adjust_address (operands[1], SImode, 4);
21193 if (REGNO (operands[0]) == REGNO (addr))
21195 output_asm_insn ("ldr\t%H0, %2", operands);
21196 output_asm_insn ("ldr\t%0, %1", operands);
21200 output_asm_insn ("ldr\t%0, %1", operands);
21201 output_asm_insn ("ldr\t%H0, %2", operands);
21206 /* Compute <address> + 4 for the high order load. */
21207 operands[2] = adjust_address (operands[1], SImode, 4);
21209 output_asm_insn ("ldr\t%0, %1", operands);
21210 output_asm_insn ("ldr\t%H0, %2", operands);
21214 arg1 = XEXP (addr, 0);
21215 arg2 = XEXP (addr, 1);
21217 if (CONSTANT_P (arg1))
21218 base = arg2, offset = arg1;
21220 base = arg1, offset = arg2;
21222 gcc_assert (GET_CODE (base) == REG);
21224 /* Catch the case of <address> = <reg> + <reg> */
21225 if (GET_CODE (offset) == REG)
21227 int reg_offset = REGNO (offset);
21228 int reg_base = REGNO (base);
21229 int reg_dest = REGNO (operands[0]);
21231 /* Add the base and offset registers together into the
21232 higher destination register. */
21233 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
21234 reg_dest + 1, reg_base, reg_offset);
21236 /* Load the lower destination register from the address in
21237 the higher destination register. */
21238 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
21239 reg_dest, reg_dest + 1);
21241 /* Load the higher destination register from its own address
21243 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
21244 reg_dest + 1, reg_dest + 1);
21248 /* Compute <address> + 4 for the high order load. */
21249 operands[2] = adjust_address (operands[1], SImode, 4);
21251 /* If the computed address is held in the low order register
21252 then load the high order register first, otherwise always
21253 load the low order register first. */
21254 if (REGNO (operands[0]) == REGNO (base))
21256 output_asm_insn ("ldr\t%H0, %2", operands);
21257 output_asm_insn ("ldr\t%0, %1", operands);
21261 output_asm_insn ("ldr\t%0, %1", operands);
21262 output_asm_insn ("ldr\t%H0, %2", operands);
21268 /* With no registers to worry about we can just load the value
21270 operands[2] = adjust_address (operands[1], SImode, 4);
21272 output_asm_insn ("ldr\t%H0, %2", operands);
21273 output_asm_insn ("ldr\t%0, %1", operands);
21277 gcc_unreachable ();
21284 thumb_output_move_mem_multiple (int n, rtx *operands)
21291 if (REGNO (operands[4]) > REGNO (operands[5]))
21294 operands[4] = operands[5];
21297 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
21298 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
21302 if (REGNO (operands[4]) > REGNO (operands[5]))
21305 operands[4] = operands[5];
21308 if (REGNO (operands[5]) > REGNO (operands[6]))
21311 operands[5] = operands[6];
21314 if (REGNO (operands[4]) > REGNO (operands[5]))
21317 operands[4] = operands[5];
21321 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
21322 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
21326 gcc_unreachable ();
21332 /* Output a call-via instruction for thumb state. */
21334 thumb_call_via_reg (rtx reg)
21336 int regno = REGNO (reg);
21339 gcc_assert (regno < LR_REGNUM);
21341 /* If we are in the normal text section we can use a single instance
21342 per compilation unit. If we are doing function sections, then we need
21343 an entry per section, since we can't rely on reachability. */
21344 if (in_section == text_section)
21346 thumb_call_reg_needed = 1;
21348 if (thumb_call_via_label[regno] == NULL)
21349 thumb_call_via_label[regno] = gen_label_rtx ();
21350 labelp = thumb_call_via_label + regno;
21354 if (cfun->machine->call_via[regno] == NULL)
21355 cfun->machine->call_via[regno] = gen_label_rtx ();
21356 labelp = cfun->machine->call_via + regno;
21359 output_asm_insn ("bl\t%a0", labelp);
21363 /* Routines for generating rtl. */
21365 thumb_expand_movmemqi (rtx *operands)
21367 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
21368 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
21369 HOST_WIDE_INT len = INTVAL (operands[2]);
21370 HOST_WIDE_INT offset = 0;
21374 emit_insn (gen_movmem12b (out, in, out, in));
21380 emit_insn (gen_movmem8b (out, in, out, in));
21386 rtx reg = gen_reg_rtx (SImode);
21387 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21388 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21395 rtx reg = gen_reg_rtx (HImode);
21396 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21397 plus_constant (in, offset))));
21398 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21406 rtx reg = gen_reg_rtx (QImode);
21407 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21408 plus_constant (in, offset))));
21409 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21415 thumb_reload_out_hi (rtx *operands)
21417 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21420 /* Handle reading a half-word from memory during reload. */
21422 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21424 gcc_unreachable ();
21427 /* Return the length of a function name prefix
21428 that starts with the character 'c'. */
21430 arm_get_strip_length (int c)
21434 ARM_NAME_ENCODING_LENGTHS
21439 /* Return a pointer to a function's name with any
21440 and all prefix encodings stripped from it. */
21442 arm_strip_name_encoding (const char *name)
21446 while ((skip = arm_get_strip_length (* name)))
21452 /* If there is a '*' anywhere in the name's prefix, then
21453 emit the stripped name verbatim, otherwise prepend an
21454 underscore if leading underscores are being used. */
21456 arm_asm_output_labelref (FILE *stream, const char *name)
21461 while ((skip = arm_get_strip_length (* name)))
21463 verbatim |= (*name == '*');
21468 fputs (name, stream);
21470 asm_fprintf (stream, "%U%s", name);
21474 arm_file_start (void)
21478 if (TARGET_UNIFIED_ASM)
21479 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21483 const char *fpu_name;
21484 if (arm_selected_arch)
21485 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21487 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21489 if (TARGET_SOFT_FLOAT)
21492 fpu_name = "softvfp";
21494 fpu_name = "softfpa";
21498 fpu_name = arm_fpu_desc->name;
21499 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21501 if (TARGET_HARD_FLOAT)
21502 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21503 if (TARGET_HARD_FLOAT_ABI)
21504 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21507 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21509 /* Some of these attributes only apply when the corresponding features
21510 are used. However we don't have any easy way of figuring this out.
21511 Conservatively record the setting that would have been used. */
21513 /* Tag_ABI_FP_rounding. */
21514 if (flag_rounding_math)
21515 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21516 if (!flag_unsafe_math_optimizations)
21518 /* Tag_ABI_FP_denomal. */
21519 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21520 /* Tag_ABI_FP_exceptions. */
21521 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21523 /* Tag_ABI_FP_user_exceptions. */
21524 if (flag_signaling_nans)
21525 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21526 /* Tag_ABI_FP_number_model. */
21527 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21528 flag_finite_math_only ? 1 : 3);
21530 /* Tag_ABI_align8_needed. */
21531 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21532 /* Tag_ABI_align8_preserved. */
21533 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21534 /* Tag_ABI_enum_size. */
21535 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21536 flag_short_enums ? 1 : 2);
21538 /* Tag_ABI_optimization_goals. */
21541 else if (optimize >= 2)
21547 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21549 /* Tag_ABI_FP_16bit_format. */
21550 if (arm_fp16_format)
21551 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21552 (int)arm_fp16_format);
21554 if (arm_lang_output_object_attributes_hook)
21555 arm_lang_output_object_attributes_hook();
21557 default_file_start();
21561 arm_file_end (void)
21565 if (NEED_INDICATE_EXEC_STACK)
21566 /* Add .note.GNU-stack. */
21567 file_end_indicate_exec_stack ();
21569 if (! thumb_call_reg_needed)
21572 switch_to_section (text_section);
21573 asm_fprintf (asm_out_file, "\t.code 16\n");
21574 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21576 for (regno = 0; regno < LR_REGNUM; regno++)
21578 rtx label = thumb_call_via_label[regno];
21582 targetm.asm_out.internal_label (asm_out_file, "L",
21583 CODE_LABEL_NUMBER (label));
21584 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21590 /* Symbols in the text segment can be accessed without indirecting via the
21591 constant pool; it may take an extra binary operation, but this is still
21592 faster than indirecting via memory. Don't do this when not optimizing,
21593 since we won't be calculating al of the offsets necessary to do this
21597 arm_encode_section_info (tree decl, rtx rtl, int first)
21599 if (optimize > 0 && TREE_CONSTANT (decl))
21600 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21602 default_encode_section_info (decl, rtl, first);
21604 #endif /* !ARM_PE */
21607 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21609 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21610 && !strcmp (prefix, "L"))
21612 arm_ccfsm_state = 0;
21613 arm_target_insn = NULL;
21615 default_internal_label (stream, prefix, labelno);
21618 /* Output code to add DELTA to the first argument, and then jump
21619 to FUNCTION. Used for C++ multiple inheritance. */
21621 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21622 HOST_WIDE_INT delta,
21623 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21626 static int thunk_label = 0;
21629 int mi_delta = delta;
21630 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21632 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21635 mi_delta = - mi_delta;
21639 int labelno = thunk_label++;
21640 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21641 /* Thunks are entered in arm mode when avaiable. */
21642 if (TARGET_THUMB1_ONLY)
21644 /* push r3 so we can use it as a temporary. */
21645 /* TODO: Omit this save if r3 is not used. */
21646 fputs ("\tpush {r3}\n", file);
21647 fputs ("\tldr\tr3, ", file);
21651 fputs ("\tldr\tr12, ", file);
21653 assemble_name (file, label);
21654 fputc ('\n', file);
21657 /* If we are generating PIC, the ldr instruction below loads
21658 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21659 the address of the add + 8, so we have:
21661 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21664 Note that we have "+ 1" because some versions of GNU ld
21665 don't set the low bit of the result for R_ARM_REL32
21666 relocations against thumb function symbols.
21667 On ARMv6M this is +4, not +8. */
21668 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21669 assemble_name (file, labelpc);
21670 fputs (":\n", file);
21671 if (TARGET_THUMB1_ONLY)
21673 /* This is 2 insns after the start of the thunk, so we know it
21674 is 4-byte aligned. */
21675 fputs ("\tadd\tr3, pc, r3\n", file);
21676 fputs ("\tmov r12, r3\n", file);
21679 fputs ("\tadd\tr12, pc, r12\n", file);
21681 else if (TARGET_THUMB1_ONLY)
21682 fputs ("\tmov r12, r3\n", file);
21684 if (TARGET_THUMB1_ONLY)
21686 if (mi_delta > 255)
21688 fputs ("\tldr\tr3, ", file);
21689 assemble_name (file, label);
21690 fputs ("+4\n", file);
21691 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21692 mi_op, this_regno, this_regno);
21694 else if (mi_delta != 0)
21696 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21697 mi_op, this_regno, this_regno,
21703 /* TODO: Use movw/movt for large constants when available. */
21704 while (mi_delta != 0)
21706 if ((mi_delta & (3 << shift)) == 0)
21710 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21711 mi_op, this_regno, this_regno,
21712 mi_delta & (0xff << shift));
21713 mi_delta &= ~(0xff << shift);
21720 if (TARGET_THUMB1_ONLY)
21721 fputs ("\tpop\t{r3}\n", file);
21723 fprintf (file, "\tbx\tr12\n");
21724 ASM_OUTPUT_ALIGN (file, 2);
21725 assemble_name (file, label);
21726 fputs (":\n", file);
21729 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21730 rtx tem = XEXP (DECL_RTL (function), 0);
21731 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21732 tem = gen_rtx_MINUS (GET_MODE (tem),
21734 gen_rtx_SYMBOL_REF (Pmode,
21735 ggc_strdup (labelpc)));
21736 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21739 /* Output ".word .LTHUNKn". */
21740 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21742 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21743 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21747 fputs ("\tb\t", file);
21748 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21749 if (NEED_PLT_RELOC)
21750 fputs ("(PLT)", file);
21751 fputc ('\n', file);
21756 arm_emit_vector_const (FILE *file, rtx x)
21759 const char * pattern;
21761 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21763 switch (GET_MODE (x))
21765 case V2SImode: pattern = "%08x"; break;
21766 case V4HImode: pattern = "%04x"; break;
21767 case V8QImode: pattern = "%02x"; break;
21768 default: gcc_unreachable ();
21771 fprintf (file, "0x");
21772 for (i = CONST_VECTOR_NUNITS (x); i--;)
21776 element = CONST_VECTOR_ELT (x, i);
21777 fprintf (file, pattern, INTVAL (element));
21783 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21784 HFmode constant pool entries are actually loaded with ldr. */
21786 arm_emit_fp16_const (rtx c)
21791 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21792 bits = real_to_target (NULL, &r, HFmode);
21793 if (WORDS_BIG_ENDIAN)
21794 assemble_zeros (2);
21795 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21796 if (!WORDS_BIG_ENDIAN)
21797 assemble_zeros (2);
21801 arm_output_load_gr (rtx *operands)
21808 if (GET_CODE (operands [1]) != MEM
21809 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21810 || GET_CODE (reg = XEXP (sum, 0)) != REG
21811 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21812 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21813 return "wldrw%?\t%0, %1";
21815 /* Fix up an out-of-range load of a GR register. */
21816 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21817 wcgr = operands[0];
21819 output_asm_insn ("ldr%?\t%0, %1", operands);
21821 operands[0] = wcgr;
21823 output_asm_insn ("tmcr%?\t%0, %1", operands);
21824 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21829 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21831 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21832 named arg and all anonymous args onto the stack.
21833 XXX I know the prologue shouldn't be pushing registers, but it is faster
21837 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21838 enum machine_mode mode,
21841 int second_time ATTRIBUTE_UNUSED)
21845 cfun->machine->uses_anonymous_args = 1;
21846 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21848 nregs = pcum->aapcs_ncrn;
21849 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21853 nregs = pcum->nregs;
21855 if (nregs < NUM_ARG_REGS)
21856 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21859 /* Return nonzero if the CONSUMER instruction (a store) does not need
21860 PRODUCER's value to calculate the address. */
21863 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21865 rtx value = PATTERN (producer);
21866 rtx addr = PATTERN (consumer);
21868 if (GET_CODE (value) == COND_EXEC)
21869 value = COND_EXEC_CODE (value);
21870 if (GET_CODE (value) == PARALLEL)
21871 value = XVECEXP (value, 0, 0);
21872 value = XEXP (value, 0);
21873 if (GET_CODE (addr) == COND_EXEC)
21874 addr = COND_EXEC_CODE (addr);
21875 if (GET_CODE (addr) == PARALLEL)
21876 addr = XVECEXP (addr, 0, 0);
21877 addr = XEXP (addr, 0);
21879 return !reg_overlap_mentioned_p (value, addr);
21882 /* Return nonzero if the CONSUMER instruction (a store) does need
21883 PRODUCER's value to calculate the address. */
21886 arm_early_store_addr_dep (rtx producer, rtx consumer)
21888 return !arm_no_early_store_addr_dep (producer, consumer);
21891 /* Return nonzero if the CONSUMER instruction (a load) does need
21892 PRODUCER's value to calculate the address. */
21895 arm_early_load_addr_dep (rtx producer, rtx consumer)
21897 rtx value = PATTERN (producer);
21898 rtx addr = PATTERN (consumer);
21900 if (GET_CODE (value) == COND_EXEC)
21901 value = COND_EXEC_CODE (value);
21902 if (GET_CODE (value) == PARALLEL)
21903 value = XVECEXP (value, 0, 0);
21904 value = XEXP (value, 0);
21905 if (GET_CODE (addr) == COND_EXEC)
21906 addr = COND_EXEC_CODE (addr);
21907 if (GET_CODE (addr) == PARALLEL)
21908 addr = XVECEXP (addr, 0, 0);
21909 addr = XEXP (addr, 1);
21911 return reg_overlap_mentioned_p (value, addr);
21914 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21915 have an early register shift value or amount dependency on the
21916 result of PRODUCER. */
21919 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21921 rtx value = PATTERN (producer);
21922 rtx op = PATTERN (consumer);
21925 if (GET_CODE (value) == COND_EXEC)
21926 value = COND_EXEC_CODE (value);
21927 if (GET_CODE (value) == PARALLEL)
21928 value = XVECEXP (value, 0, 0);
21929 value = XEXP (value, 0);
21930 if (GET_CODE (op) == COND_EXEC)
21931 op = COND_EXEC_CODE (op);
21932 if (GET_CODE (op) == PARALLEL)
21933 op = XVECEXP (op, 0, 0);
21936 early_op = XEXP (op, 0);
21937 /* This is either an actual independent shift, or a shift applied to
21938 the first operand of another operation. We want the whole shift
21940 if (GET_CODE (early_op) == REG)
21943 return !reg_overlap_mentioned_p (value, early_op);
21946 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21947 have an early register shift value dependency on the result of
21951 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21953 rtx value = PATTERN (producer);
21954 rtx op = PATTERN (consumer);
21957 if (GET_CODE (value) == COND_EXEC)
21958 value = COND_EXEC_CODE (value);
21959 if (GET_CODE (value) == PARALLEL)
21960 value = XVECEXP (value, 0, 0);
21961 value = XEXP (value, 0);
21962 if (GET_CODE (op) == COND_EXEC)
21963 op = COND_EXEC_CODE (op);
21964 if (GET_CODE (op) == PARALLEL)
21965 op = XVECEXP (op, 0, 0);
21968 early_op = XEXP (op, 0);
21970 /* This is either an actual independent shift, or a shift applied to
21971 the first operand of another operation. We want the value being
21972 shifted, in either case. */
21973 if (GET_CODE (early_op) != REG)
21974 early_op = XEXP (early_op, 0);
21976 return !reg_overlap_mentioned_p (value, early_op);
21979 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21980 have an early register mult dependency on the result of
21984 arm_no_early_mul_dep (rtx producer, rtx consumer)
21986 rtx value = PATTERN (producer);
21987 rtx op = PATTERN (consumer);
21989 if (GET_CODE (value) == COND_EXEC)
21990 value = COND_EXEC_CODE (value);
21991 if (GET_CODE (value) == PARALLEL)
21992 value = XVECEXP (value, 0, 0);
21993 value = XEXP (value, 0);
21994 if (GET_CODE (op) == COND_EXEC)
21995 op = COND_EXEC_CODE (op);
21996 if (GET_CODE (op) == PARALLEL)
21997 op = XVECEXP (op, 0, 0);
22000 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
22002 if (GET_CODE (XEXP (op, 0)) == MULT)
22003 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
22005 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
22011 /* We can't rely on the caller doing the proper promotion when
22012 using APCS or ATPCS. */
22015 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
22017 return !TARGET_AAPCS_BASED;
22020 static enum machine_mode
22021 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
22022 enum machine_mode mode,
22023 int *punsignedp ATTRIBUTE_UNUSED,
22024 const_tree fntype ATTRIBUTE_UNUSED,
22025 int for_return ATTRIBUTE_UNUSED)
22027 if (GET_MODE_CLASS (mode) == MODE_INT
22028 && GET_MODE_SIZE (mode) < 4)
22034 /* AAPCS based ABIs use short enums by default. */
22037 arm_default_short_enums (void)
22039 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
22043 /* AAPCS requires that anonymous bitfields affect structure alignment. */
22046 arm_align_anon_bitfield (void)
22048 return TARGET_AAPCS_BASED;
22052 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
22055 arm_cxx_guard_type (void)
22057 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
22060 /* Return non-zero if the consumer (a multiply-accumulate instruction)
22061 has an accumulator dependency on the result of the producer (a
22062 multiplication instruction) and no other dependency on that result. */
22064 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
22066 rtx mul = PATTERN (producer);
22067 rtx mac = PATTERN (consumer);
22069 rtx mac_op0, mac_op1, mac_acc;
22071 if (GET_CODE (mul) == COND_EXEC)
22072 mul = COND_EXEC_CODE (mul);
22073 if (GET_CODE (mac) == COND_EXEC)
22074 mac = COND_EXEC_CODE (mac);
22076 /* Check that mul is of the form (set (...) (mult ...))
22077 and mla is of the form (set (...) (plus (mult ...) (...))). */
22078 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
22079 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
22080 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
22083 mul_result = XEXP (mul, 0);
22084 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
22085 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
22086 mac_acc = XEXP (XEXP (mac, 1), 1);
22088 return (reg_overlap_mentioned_p (mul_result, mac_acc)
22089 && !reg_overlap_mentioned_p (mul_result, mac_op0)
22090 && !reg_overlap_mentioned_p (mul_result, mac_op1));
22094 /* The EABI says test the least significant bit of a guard variable. */
22097 arm_cxx_guard_mask_bit (void)
22099 return TARGET_AAPCS_BASED;
22103 /* The EABI specifies that all array cookies are 8 bytes long. */
22106 arm_get_cookie_size (tree type)
22110 if (!TARGET_AAPCS_BASED)
22111 return default_cxx_get_cookie_size (type);
22113 size = build_int_cst (sizetype, 8);
22118 /* The EABI says that array cookies should also contain the element size. */
22121 arm_cookie_has_size (void)
22123 return TARGET_AAPCS_BASED;
22127 /* The EABI says constructors and destructors should return a pointer to
22128 the object constructed/destroyed. */
22131 arm_cxx_cdtor_returns_this (void)
22133 return TARGET_AAPCS_BASED;
22136 /* The EABI says that an inline function may never be the key
22140 arm_cxx_key_method_may_be_inline (void)
22142 return !TARGET_AAPCS_BASED;
22146 arm_cxx_determine_class_data_visibility (tree decl)
22148 if (!TARGET_AAPCS_BASED
22149 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
22152 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
22153 is exported. However, on systems without dynamic vague linkage,
22154 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
22155 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
22156 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
22158 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
22159 DECL_VISIBILITY_SPECIFIED (decl) = 1;
22163 arm_cxx_class_data_always_comdat (void)
22165 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22166 vague linkage if the class has no key function. */
22167 return !TARGET_AAPCS_BASED;
22171 /* The EABI says __aeabi_atexit should be used to register static
22175 arm_cxx_use_aeabi_atexit (void)
22177 return TARGET_AAPCS_BASED;
22182 arm_set_return_address (rtx source, rtx scratch)
22184 arm_stack_offsets *offsets;
22185 HOST_WIDE_INT delta;
22187 unsigned long saved_regs;
22189 offsets = arm_get_frame_offsets ();
22190 saved_regs = offsets->saved_regs_mask;
22192 if ((saved_regs & (1 << LR_REGNUM)) == 0)
22193 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22196 if (frame_pointer_needed)
22197 addr = plus_constant(hard_frame_pointer_rtx, -4);
22200 /* LR will be the first saved register. */
22201 delta = offsets->outgoing_args - (offsets->frame + 4);
22206 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
22207 GEN_INT (delta & ~4095)));
22212 addr = stack_pointer_rtx;
22214 addr = plus_constant (addr, delta);
22216 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22222 thumb_set_return_address (rtx source, rtx scratch)
22224 arm_stack_offsets *offsets;
22225 HOST_WIDE_INT delta;
22226 HOST_WIDE_INT limit;
22229 unsigned long mask;
22233 offsets = arm_get_frame_offsets ();
22234 mask = offsets->saved_regs_mask;
22235 if (mask & (1 << LR_REGNUM))
22238 /* Find the saved regs. */
22239 if (frame_pointer_needed)
22241 delta = offsets->soft_frame - offsets->saved_args;
22242 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
22248 delta = offsets->outgoing_args - offsets->saved_args;
22251 /* Allow for the stack frame. */
22252 if (TARGET_THUMB1 && TARGET_BACKTRACE)
22254 /* The link register is always the first saved register. */
22257 /* Construct the address. */
22258 addr = gen_rtx_REG (SImode, reg);
22261 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
22262 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
22266 addr = plus_constant (addr, delta);
22268 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22271 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22274 /* Implements target hook vector_mode_supported_p. */
22276 arm_vector_mode_supported_p (enum machine_mode mode)
22278 /* Neon also supports V2SImode, etc. listed in the clause below. */
22279 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
22280 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
22283 if ((TARGET_NEON || TARGET_IWMMXT)
22284 && ((mode == V2SImode)
22285 || (mode == V4HImode)
22286 || (mode == V8QImode)))
22292 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22293 registers when autovectorizing for Neon, at least until multiple vector
22294 widths are supported properly by the middle-end. */
22296 static enum machine_mode
22297 arm_preferred_simd_mode (enum machine_mode mode)
22303 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22305 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22307 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22309 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22311 if (TARGET_NEON_VECTORIZE_QUAD)
22318 if (TARGET_REALLY_IWMMXT)
22334 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22336 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
22337 using r0-r4 for function arguments, r7 for the stack frame and don't have
22338 enough left over to do doubleword arithmetic. For Thumb-2 all the
22339 potentially problematic instructions accept high registers so this is not
22340 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
22341 that require many low registers. */
22343 arm_class_likely_spilled_p (reg_class_t rclass)
22345 if ((TARGET_THUMB1 && rclass == LO_REGS)
22346 || rclass == CC_REG)
22352 /* Implements target hook small_register_classes_for_mode_p. */
22354 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
22356 return TARGET_THUMB1;
22359 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22360 ARM insns and therefore guarantee that the shift count is modulo 256.
22361 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22362 guarantee no particular behavior for out-of-range counts. */
22364 static unsigned HOST_WIDE_INT
22365 arm_shift_truncation_mask (enum machine_mode mode)
22367 return mode == SImode ? 255 : 0;
22371 /* Map internal gcc register numbers to DWARF2 register numbers. */
22374 arm_dbx_register_number (unsigned int regno)
22379 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22380 compatibility. The EABI defines them as registers 96-103. */
22381 if (IS_FPA_REGNUM (regno))
22382 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
22384 if (IS_VFP_REGNUM (regno))
22386 /* See comment in arm_dwarf_register_span. */
22387 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22388 return 64 + regno - FIRST_VFP_REGNUM;
22390 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
22393 if (IS_IWMMXT_GR_REGNUM (regno))
22394 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
22396 if (IS_IWMMXT_REGNUM (regno))
22397 return 112 + regno - FIRST_IWMMXT_REGNUM;
22399 gcc_unreachable ();
22402 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22403 GCC models tham as 64 32-bit registers, so we need to describe this to
22404 the DWARF generation code. Other registers can use the default. */
22406 arm_dwarf_register_span (rtx rtl)
22413 regno = REGNO (rtl);
22414 if (!IS_VFP_REGNUM (regno))
22417 /* XXX FIXME: The EABI defines two VFP register ranges:
22418 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22420 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22421 corresponding D register. Until GDB supports this, we shall use the
22422 legacy encodings. We also use these encodings for D0-D15 for
22423 compatibility with older debuggers. */
22424 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22427 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
22428 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
22429 regno = (regno - FIRST_VFP_REGNUM) / 2;
22430 for (i = 0; i < nregs; i++)
22431 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
22436 #if ARM_UNWIND_INFO
22437 /* Emit unwind directives for a store-multiple instruction or stack pointer
22438 push during alignment.
22439 These should only ever be generated by the function prologue code, so
22440 expect them to have a particular form. */
22443 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
22446 HOST_WIDE_INT offset;
22447 HOST_WIDE_INT nregs;
22453 e = XVECEXP (p, 0, 0);
22454 if (GET_CODE (e) != SET)
22457 /* First insn will adjust the stack pointer. */
22458 if (GET_CODE (e) != SET
22459 || GET_CODE (XEXP (e, 0)) != REG
22460 || REGNO (XEXP (e, 0)) != SP_REGNUM
22461 || GET_CODE (XEXP (e, 1)) != PLUS)
22464 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
22465 nregs = XVECLEN (p, 0) - 1;
22467 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22470 /* The function prologue may also push pc, but not annotate it as it is
22471 never restored. We turn this into a stack pointer adjustment. */
22472 if (nregs * 4 == offset - 4)
22474 fprintf (asm_out_file, "\t.pad #4\n");
22478 fprintf (asm_out_file, "\t.save {");
22480 else if (IS_VFP_REGNUM (reg))
22483 fprintf (asm_out_file, "\t.vsave {");
22485 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22487 /* FPA registers are done differently. */
22488 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22492 /* Unknown register type. */
22495 /* If the stack increment doesn't match the size of the saved registers,
22496 something has gone horribly wrong. */
22497 if (offset != nregs * reg_size)
22502 /* The remaining insns will describe the stores. */
22503 for (i = 1; i <= nregs; i++)
22505 /* Expect (set (mem <addr>) (reg)).
22506 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22507 e = XVECEXP (p, 0, i);
22508 if (GET_CODE (e) != SET
22509 || GET_CODE (XEXP (e, 0)) != MEM
22510 || GET_CODE (XEXP (e, 1)) != REG)
22513 reg = REGNO (XEXP (e, 1));
22518 fprintf (asm_out_file, ", ");
22519 /* We can't use %r for vfp because we need to use the
22520 double precision register names. */
22521 if (IS_VFP_REGNUM (reg))
22522 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22524 asm_fprintf (asm_out_file, "%r", reg);
22526 #ifdef ENABLE_CHECKING
22527 /* Check that the addresses are consecutive. */
22528 e = XEXP (XEXP (e, 0), 0);
22529 if (GET_CODE (e) == PLUS)
22531 offset += reg_size;
22532 if (GET_CODE (XEXP (e, 0)) != REG
22533 || REGNO (XEXP (e, 0)) != SP_REGNUM
22534 || GET_CODE (XEXP (e, 1)) != CONST_INT
22535 || offset != INTVAL (XEXP (e, 1)))
22539 || GET_CODE (e) != REG
22540 || REGNO (e) != SP_REGNUM)
22544 fprintf (asm_out_file, "}\n");
22547 /* Emit unwind directives for a SET. */
22550 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22558 switch (GET_CODE (e0))
22561 /* Pushing a single register. */
22562 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22563 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22564 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22567 asm_fprintf (asm_out_file, "\t.save ");
22568 if (IS_VFP_REGNUM (REGNO (e1)))
22569 asm_fprintf(asm_out_file, "{d%d}\n",
22570 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22572 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22576 if (REGNO (e0) == SP_REGNUM)
22578 /* A stack increment. */
22579 if (GET_CODE (e1) != PLUS
22580 || GET_CODE (XEXP (e1, 0)) != REG
22581 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22582 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22585 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22586 -INTVAL (XEXP (e1, 1)));
22588 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22590 HOST_WIDE_INT offset;
22592 if (GET_CODE (e1) == PLUS)
22594 if (GET_CODE (XEXP (e1, 0)) != REG
22595 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22597 reg = REGNO (XEXP (e1, 0));
22598 offset = INTVAL (XEXP (e1, 1));
22599 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22600 HARD_FRAME_POINTER_REGNUM, reg,
22603 else if (GET_CODE (e1) == REG)
22606 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22607 HARD_FRAME_POINTER_REGNUM, reg);
22612 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22614 /* Move from sp to reg. */
22615 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22617 else if (GET_CODE (e1) == PLUS
22618 && GET_CODE (XEXP (e1, 0)) == REG
22619 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22620 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22622 /* Set reg to offset from sp. */
22623 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22624 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22626 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22628 /* Stack pointer save before alignment. */
22630 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22643 /* Emit unwind directives for the given insn. */
22646 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22650 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22653 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22654 && (TREE_NOTHROW (current_function_decl)
22655 || crtl->all_throwers_are_sibcalls))
22658 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22661 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22663 pat = XEXP (pat, 0);
22665 pat = PATTERN (insn);
22667 switch (GET_CODE (pat))
22670 arm_unwind_emit_set (asm_out_file, pat);
22674 /* Store multiple. */
22675 arm_unwind_emit_sequence (asm_out_file, pat);
22684 /* Output a reference from a function exception table to the type_info
22685 object X. The EABI specifies that the symbol should be relocated by
22686 an R_ARM_TARGET2 relocation. */
22689 arm_output_ttype (rtx x)
22691 fputs ("\t.word\t", asm_out_file);
22692 output_addr_const (asm_out_file, x);
22693 /* Use special relocations for symbol references. */
22694 if (GET_CODE (x) != CONST_INT)
22695 fputs ("(TARGET2)", asm_out_file);
22696 fputc ('\n', asm_out_file);
22701 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22704 arm_asm_emit_except_personality (rtx personality)
22706 fputs ("\t.personality\t", asm_out_file);
22707 output_addr_const (asm_out_file, personality);
22708 fputc ('\n', asm_out_file);
22711 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22714 arm_asm_init_sections (void)
22716 exception_section = get_unnamed_section (0, output_section_asm_op,
22719 #endif /* ARM_UNWIND_INFO */
22721 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22723 static enum unwind_info_type
22724 arm_except_unwind_info (struct gcc_options *opts)
22726 /* Honor the --enable-sjlj-exceptions configure switch. */
22727 #ifdef CONFIG_SJLJ_EXCEPTIONS
22728 if (CONFIG_SJLJ_EXCEPTIONS)
22732 /* If not using ARM EABI unwind tables... */
22733 if (ARM_UNWIND_INFO)
22735 /* For simplicity elsewhere in this file, indicate that all unwind
22736 info is disabled if we're not emitting unwind tables. */
22737 if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
22743 /* ... we use sjlj exceptions for backwards compatibility. */
22748 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22749 stack alignment. */
22752 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22754 rtx unspec = SET_SRC (pattern);
22755 gcc_assert (GET_CODE (unspec) == UNSPEC);
22759 case UNSPEC_STACK_ALIGN:
22760 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22761 put anything on the stack, so hopefully it won't matter.
22762 CFA = SP will be correct after alignment. */
22763 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22764 SET_DEST (pattern));
22767 gcc_unreachable ();
22772 /* Output unwind directives for the start/end of a function. */
22775 arm_output_fn_unwind (FILE * f, bool prologue)
22777 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22781 fputs ("\t.fnstart\n", f);
22784 /* If this function will never be unwound, then mark it as such.
22785 The came condition is used in arm_unwind_emit to suppress
22786 the frame annotations. */
22787 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22788 && (TREE_NOTHROW (current_function_decl)
22789 || crtl->all_throwers_are_sibcalls))
22790 fputs("\t.cantunwind\n", f);
22792 fputs ("\t.fnend\n", f);
22797 arm_emit_tls_decoration (FILE *fp, rtx x)
22799 enum tls_reloc reloc;
22802 val = XVECEXP (x, 0, 0);
22803 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22805 output_addr_const (fp, val);
22810 fputs ("(tlsgd)", fp);
22813 fputs ("(tlsldm)", fp);
22816 fputs ("(tlsldo)", fp);
22819 fputs ("(gottpoff)", fp);
22822 fputs ("(tpoff)", fp);
22825 gcc_unreachable ();
22833 fputs (" + (. - ", fp);
22834 output_addr_const (fp, XVECEXP (x, 0, 2));
22836 output_addr_const (fp, XVECEXP (x, 0, 3));
22846 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22849 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22851 gcc_assert (size == 4);
22852 fputs ("\t.word\t", file);
22853 output_addr_const (file, x);
22854 fputs ("(tlsldo)", file);
22857 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22860 arm_output_addr_const_extra (FILE *fp, rtx x)
22862 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22863 return arm_emit_tls_decoration (fp, x);
22864 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22867 int labelno = INTVAL (XVECEXP (x, 0, 0));
22869 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22870 assemble_name_raw (fp, label);
22874 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22876 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22880 output_addr_const (fp, XVECEXP (x, 0, 0));
22884 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22886 output_addr_const (fp, XVECEXP (x, 0, 0));
22890 output_addr_const (fp, XVECEXP (x, 0, 1));
22894 else if (GET_CODE (x) == CONST_VECTOR)
22895 return arm_emit_vector_const (fp, x);
22900 /* Output assembly for a shift instruction.
22901 SET_FLAGS determines how the instruction modifies the condition codes.
22902 0 - Do not set condition codes.
22903 1 - Set condition codes.
22904 2 - Use smallest instruction. */
22906 arm_output_shift(rtx * operands, int set_flags)
22909 static const char flag_chars[3] = {'?', '.', '!'};
22914 c = flag_chars[set_flags];
22915 if (TARGET_UNIFIED_ASM)
22917 shift = shift_op(operands[3], &val);
22921 operands[2] = GEN_INT(val);
22922 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
22925 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
22928 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
22929 output_asm_insn (pattern, operands);
22933 /* Output a Thumb-1 casesi dispatch sequence. */
22935 thumb1_output_casesi (rtx *operands)
22937 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
22939 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22941 switch (GET_MODE(diff_vec))
22944 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22945 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
22947 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22948 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
22950 return "bl\t%___gnu_thumb1_case_si";
22952 gcc_unreachable ();
22956 /* Output a Thumb-2 casesi instruction. */
22958 thumb2_output_casesi (rtx *operands)
22960 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
22962 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22964 output_asm_insn ("cmp\t%0, %1", operands);
22965 output_asm_insn ("bhi\t%l3", operands);
22966 switch (GET_MODE(diff_vec))
22969 return "tbb\t[%|pc, %0]";
22971 return "tbh\t[%|pc, %0, lsl #1]";
22975 output_asm_insn ("adr\t%4, %l2", operands);
22976 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
22977 output_asm_insn ("add\t%4, %4, %5", operands);
22982 output_asm_insn ("adr\t%4, %l2", operands);
22983 return "ldr\t%|pc, [%4, %0, lsl #2]";
22986 gcc_unreachable ();
22990 /* Most ARM cores are single issue, but some newer ones can dual issue.
22991 The scheduler descriptions rely on this being correct. */
22993 arm_issue_rate (void)
23010 /* A table and a function to perform ARM-specific name mangling for
23011 NEON vector types in order to conform to the AAPCS (see "Procedure
23012 Call Standard for the ARM Architecture", Appendix A). To qualify
23013 for emission with the mangled names defined in that document, a
23014 vector type must not only be of the correct mode but also be
23015 composed of NEON vector element types (e.g. __builtin_neon_qi). */
23018 enum machine_mode mode;
23019 const char *element_type_name;
23020 const char *aapcs_name;
23021 } arm_mangle_map_entry;
23023 static arm_mangle_map_entry arm_mangle_map[] = {
23024 /* 64-bit containerized types. */
23025 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
23026 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
23027 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
23028 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
23029 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
23030 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
23031 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
23032 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
23033 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
23034 /* 128-bit containerized types. */
23035 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
23036 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
23037 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
23038 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
23039 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
23040 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
23041 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
23042 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
23043 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
23044 { VOIDmode, NULL, NULL }
23048 arm_mangle_type (const_tree type)
23050 arm_mangle_map_entry *pos = arm_mangle_map;
23052 /* The ARM ABI documents (10th October 2008) say that "__va_list"
23053 has to be managled as if it is in the "std" namespace. */
23054 if (TARGET_AAPCS_BASED
23055 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
23057 static bool warned;
23058 if (!warned && warn_psabi && !in_system_header)
23061 inform (input_location,
23062 "the mangling of %<va_list%> has changed in GCC 4.4");
23064 return "St9__va_list";
23067 /* Half-precision float. */
23068 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
23071 if (TREE_CODE (type) != VECTOR_TYPE)
23074 /* Check the mode of the vector type, and the name of the vector
23075 element type, against the table. */
23076 while (pos->mode != VOIDmode)
23078 tree elt_type = TREE_TYPE (type);
23080 if (pos->mode == TYPE_MODE (type)
23081 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
23082 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
23083 pos->element_type_name))
23084 return pos->aapcs_name;
23089 /* Use the default mangling for unrecognized (possibly user-defined)
23094 /* Order of allocation of core registers for Thumb: this allocation is
23095 written over the corresponding initial entries of the array
23096 initialized with REG_ALLOC_ORDER. We allocate all low registers
23097 first. Saving and restoring a low register is usually cheaper than
23098 using a call-clobbered high register. */
23100 static const int thumb_core_reg_alloc_order[] =
23102 3, 2, 1, 0, 4, 5, 6, 7,
23103 14, 12, 8, 9, 10, 11, 13, 15
23106 /* Adjust register allocation order when compiling for Thumb. */
23109 arm_order_regs_for_local_alloc (void)
23111 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
23112 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
23114 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
23115 sizeof (thumb_core_reg_alloc_order));
23118 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
23121 arm_frame_pointer_required (void)
23123 return (cfun->has_nonlocal_label
23124 || SUBTARGET_FRAME_POINTER_REQUIRED
23125 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
23128 /* Only thumb1 can't support conditional execution, so return true if
23129 the target is not thumb1. */
23131 arm_have_conditional_execution (void)
23133 return !TARGET_THUMB1;
23136 /* Legitimize a memory reference for sync primitive implemented using
23137 ldrex / strex. We currently force the form of the reference to be
23138 indirect without offset. We do not yet support the indirect offset
23139 addressing supported by some ARM targets for these
23142 arm_legitimize_sync_memory (rtx memory)
23144 rtx addr = force_reg (Pmode, XEXP (memory, 0));
23145 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
23147 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
23148 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
23149 return legitimate_memory;
23152 /* An instruction emitter. */
23153 typedef void (* emit_f) (int label, const char *, rtx *);
23155 /* An instruction emitter that emits via the conventional
23156 output_asm_insn. */
23158 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
23160 output_asm_insn (pattern, operands);
23163 /* Count the number of emitted synchronization instructions. */
23164 static unsigned arm_insn_count;
23166 /* An emitter that counts emitted instructions but does not actually
23167 emit instruction into the the instruction stream. */
23169 arm_count (int label,
23170 const char *pattern ATTRIBUTE_UNUSED,
23171 rtx *operands ATTRIBUTE_UNUSED)
23177 /* Construct a pattern using conventional output formatting and feed
23178 it to output_asm_insn. Provides a mechanism to construct the
23179 output pattern on the fly. Note the hard limit on the pattern
23181 static void ATTRIBUTE_PRINTF_4
23182 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
23183 const char *pattern, ...)
23188 va_start (ap, pattern);
23189 vsprintf (buffer, pattern, ap);
23191 emit (label, buffer, operands);
23194 /* Emit the memory barrier instruction, if any, provided by this
23195 target to a specified emitter. */
23197 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23199 if (TARGET_HAVE_DMB)
23201 /* Note we issue a system level barrier. We should consider
23202 issuing a inner shareabilty zone barrier here instead, ie.
23204 emit (0, "dmb\tsy", operands);
23208 if (TARGET_HAVE_DMB_MCR)
23210 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23214 gcc_unreachable ();
23217 /* Emit the memory barrier instruction, if any, provided by this
23220 arm_output_memory_barrier (rtx *operands)
23222 arm_process_output_memory_barrier (arm_emit, operands);
23226 /* Helper to figure out the instruction suffix required on ldrex/strex
23227 for operations on an object of the specified mode. */
23228 static const char *
23229 arm_ldrex_suffix (enum machine_mode mode)
23233 case QImode: return "b";
23234 case HImode: return "h";
23235 case SImode: return "";
23236 case DImode: return "d";
23238 gcc_unreachable ();
23243 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23246 arm_output_ldrex (emit_f emit,
23247 enum machine_mode mode,
23251 const char *suffix = arm_ldrex_suffix (mode);
23254 operands[0] = target;
23255 operands[1] = memory;
23256 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
23259 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23262 arm_output_strex (emit_f emit,
23263 enum machine_mode mode,
23269 const char *suffix = arm_ldrex_suffix (mode);
23272 operands[0] = result;
23273 operands[1] = value;
23274 operands[2] = memory;
23275 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
23279 /* Helper to emit a two operand instruction. */
23281 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
23287 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23290 /* Helper to emit a three operand instruction. */
23292 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23299 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23302 /* Emit a load store exclusive synchronization loop.
23306 if old_value != required_value
23308 t1 = sync_op (old_value, new_value)
23309 [mem] = t1, t2 = [0|1]
23313 t1 == t2 is not permitted
23314 t1 == old_value is permitted
23318 RTX register or const_int representing the required old_value for
23319 the modify to continue, if NULL no comparsion is performed. */
23321 arm_output_sync_loop (emit_f emit,
23322 enum machine_mode mode,
23325 rtx required_value,
23329 enum attr_sync_op sync_op,
23330 int early_barrier_required)
23334 gcc_assert (t1 != t2);
23336 if (early_barrier_required)
23337 arm_process_output_memory_barrier (emit, NULL);
23339 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23341 arm_output_ldrex (emit, mode, old_value, memory);
23343 if (required_value)
23347 operands[0] = old_value;
23348 operands[1] = required_value;
23349 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23350 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23356 arm_output_op3 (emit, "add", t1, old_value, new_value);
23360 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23364 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23368 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23372 arm_output_op3 (emit,"and", t1, old_value, new_value);
23376 arm_output_op3 (emit, "and", t1, old_value, new_value);
23377 arm_output_op2 (emit, "mvn", t1, t1);
23387 arm_output_strex (emit, mode, "", t2, t1, memory);
23389 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23390 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23391 LOCAL_LABEL_PREFIX);
23395 /* Use old_value for the return value because for some operations
23396 the old_value can easily be restored. This saves one register. */
23397 arm_output_strex (emit, mode, "", old_value, t1, memory);
23398 operands[0] = old_value;
23399 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23400 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23401 LOCAL_LABEL_PREFIX);
23406 arm_output_op3 (emit, "sub", old_value, t1, new_value);
23410 arm_output_op3 (emit, "add", old_value, t1, new_value);
23414 arm_output_op3 (emit, "eor", old_value, t1, new_value);
23418 arm_output_op2 (emit, "mov", old_value, required_value);
23422 gcc_unreachable ();
23426 arm_process_output_memory_barrier (emit, NULL);
23427 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23431 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23434 default_value = operands[index - 1];
23436 return default_value;
23439 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23440 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23442 /* Extract the operands for a synchroniztion instruction from the
23443 instructions attributes and emit the instruction. */
23445 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23447 rtx result, memory, required_value, new_value, t1, t2;
23449 enum machine_mode mode;
23450 enum attr_sync_op sync_op;
23452 result = FETCH_SYNC_OPERAND(result, 0);
23453 memory = FETCH_SYNC_OPERAND(memory, 0);
23454 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23455 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23456 t1 = FETCH_SYNC_OPERAND(t1, 0);
23457 t2 = FETCH_SYNC_OPERAND(t2, 0);
23459 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23460 sync_op = get_attr_sync_op (insn);
23461 mode = GET_MODE (memory);
23463 arm_output_sync_loop (emit, mode, result, memory, required_value,
23464 new_value, t1, t2, sync_op, early_barrier);
23467 /* Emit a synchronization instruction loop. */
23469 arm_output_sync_insn (rtx insn, rtx *operands)
23471 arm_process_output_sync_insn (arm_emit, insn, operands);
23475 /* Count the number of machine instruction that will be emitted for a
23476 synchronization instruction. Note that the emitter used does not
23477 emit instructions, it just counts instructions being carefull not
23478 to count labels. */
23480 arm_sync_loop_insns (rtx insn, rtx *operands)
23482 arm_insn_count = 0;
23483 arm_process_output_sync_insn (arm_count, insn, operands);
23484 return arm_insn_count;
23487 /* Helper to call a target sync instruction generator, dealing with
23488 the variation in operands required by the different generators. */
23490 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23491 rtx memory, rtx required_value, rtx new_value)
23493 switch (generator->op)
23495 case arm_sync_generator_omn:
23496 gcc_assert (! required_value);
23497 return generator->u.omn (old_value, memory, new_value);
23499 case arm_sync_generator_omrn:
23500 gcc_assert (required_value);
23501 return generator->u.omrn (old_value, memory, required_value, new_value);
23507 /* Expand a synchronization loop. The synchronization loop is expanded
23508 as an opaque block of instructions in order to ensure that we do
23509 not subsequently get extraneous memory accesses inserted within the
23510 critical region. The exclusive access property of ldrex/strex is
23511 only guaranteed in there are no intervening memory accesses. */
23513 arm_expand_sync (enum machine_mode mode,
23514 struct arm_sync_generator *generator,
23515 rtx target, rtx memory, rtx required_value, rtx new_value)
23517 if (target == NULL)
23518 target = gen_reg_rtx (mode);
23520 memory = arm_legitimize_sync_memory (memory);
23521 if (mode != SImode)
23523 rtx load_temp = gen_reg_rtx (SImode);
23525 if (required_value)
23526 required_value = convert_modes (SImode, mode, required_value, true);
23528 new_value = convert_modes (SImode, mode, new_value, true);
23529 emit_insn (arm_call_generator (generator, load_temp, memory,
23530 required_value, new_value));
23531 emit_move_insn (target, gen_lowpart (mode, load_temp));
23535 emit_insn (arm_call_generator (generator, target, memory, required_value,
23540 static unsigned int
23541 arm_autovectorize_vector_sizes (void)
23543 return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
23547 arm_vector_alignment_reachable (const_tree type, bool is_packed)
23549 /* Vectors which aren't in packed structures will not be less aligned than
23550 the natural alignment of their element type, so this is safe. */
23551 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23554 return default_builtin_vector_alignment_reachable (type, is_packed);
23558 arm_builtin_support_vector_misalignment (enum machine_mode mode,
23559 const_tree type, int misalignment,
23562 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23564 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
23569 /* If the misalignment is unknown, we should be able to handle the access
23570 so long as it is not to a member of a packed data structure. */
23571 if (misalignment == -1)
23574 /* Return true if the misalignment is a multiple of the natural alignment
23575 of the vector's element type. This is probably always going to be
23576 true in practice, since we've already established that this isn't a
23578 return ((misalignment % align) == 0);
23581 return default_builtin_support_vector_misalignment (mode, type, misalignment,
23586 arm_conditional_register_usage (void)
23590 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
23592 for (regno = FIRST_FPA_REGNUM;
23593 regno <= LAST_FPA_REGNUM; ++regno)
23594 fixed_regs[regno] = call_used_regs[regno] = 1;
23597 if (TARGET_THUMB1 && optimize_size)
23599 /* When optimizing for size on Thumb-1, it's better not
23600 to use the HI regs, because of the overhead of
23602 for (regno = FIRST_HI_REGNUM;
23603 regno <= LAST_HI_REGNUM; ++regno)
23604 fixed_regs[regno] = call_used_regs[regno] = 1;
23607 /* The link register can be clobbered by any branch insn,
23608 but we have no way to track that at present, so mark
23609 it as unavailable. */
23611 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
23613 if (TARGET_32BIT && TARGET_HARD_FLOAT)
23615 if (TARGET_MAVERICK)
23617 for (regno = FIRST_FPA_REGNUM;
23618 regno <= LAST_FPA_REGNUM; ++ regno)
23619 fixed_regs[regno] = call_used_regs[regno] = 1;
23620 for (regno = FIRST_CIRRUS_FP_REGNUM;
23621 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
23623 fixed_regs[regno] = 0;
23624 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
23629 /* VFPv3 registers are disabled when earlier VFP
23630 versions are selected due to the definition of
23631 LAST_VFP_REGNUM. */
23632 for (regno = FIRST_VFP_REGNUM;
23633 regno <= LAST_VFP_REGNUM; ++ regno)
23635 fixed_regs[regno] = 0;
23636 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
23637 || regno >= FIRST_VFP_REGNUM + 32;
23642 if (TARGET_REALLY_IWMMXT)
23644 regno = FIRST_IWMMXT_GR_REGNUM;
23645 /* The 2002/10/09 revision of the XScale ABI has wCG0
23646 and wCG1 as call-preserved registers. The 2002/11/21
23647 revision changed this so that all wCG registers are
23648 scratch registers. */
23649 for (regno = FIRST_IWMMXT_GR_REGNUM;
23650 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
23651 fixed_regs[regno] = 0;
23652 /* The XScale ABI has wR0 - wR9 as scratch registers,
23653 the rest as call-preserved registers. */
23654 for (regno = FIRST_IWMMXT_REGNUM;
23655 regno <= LAST_IWMMXT_REGNUM; ++ regno)
23657 fixed_regs[regno] = 0;
23658 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
23662 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
23664 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23665 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23667 else if (TARGET_APCS_STACK)
23669 fixed_regs[10] = 1;
23670 call_used_regs[10] = 1;
23672 /* -mcaller-super-interworking reserves r11 for calls to
23673 _interwork_r11_call_via_rN(). Making the register global
23674 is an easy way of ensuring that it remains valid for all
23676 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
23677 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
23679 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23680 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23681 if (TARGET_CALLER_INTERWORKING)
23682 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23684 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23688 arm_preferred_rename_class (reg_class_t rclass)
23690 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
23691 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
23692 and code size can be reduced. */
23693 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
23699 /* Compute the atrribute "length" of insn "*push_multi".
23700 So this function MUST be kept in sync with that insn pattern. */
23702 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
23704 int i, regno, hi_reg;
23705 int num_saves = XVECLEN (parallel_op, 0);
23712 regno = REGNO (first_op);
23713 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
23714 for (i = 1; i < num_saves && !hi_reg; i++)
23716 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
23717 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
23725 #include "gt-arm.h"