1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
67 /* Forward function declarations. */
68 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
69 static int arm_compute_static_chain_stack_bytes (void);
70 static arm_stack_offsets *arm_get_frame_offsets (void);
71 static void arm_add_gc_roots (void);
72 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
73 HOST_WIDE_INT, rtx, rtx, int, int);
74 static unsigned bit_count (unsigned long);
75 static int arm_address_register_rtx_p (rtx, int);
76 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
77 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
78 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
79 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
80 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
81 inline static int thumb1_index_register_rtx_p (rtx, int);
82 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
83 static int thumb_far_jump_used_p (void);
84 static bool thumb_force_lr_save (void);
85 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
86 static rtx emit_sfm (int, int);
87 static unsigned arm_size_return_regs (void);
88 static bool arm_assemble_integer (rtx, unsigned int, int);
89 static void arm_print_operand (FILE *, rtx, int);
90 static void arm_print_operand_address (FILE *, rtx);
91 static bool arm_print_operand_punct_valid_p (unsigned char code);
92 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
93 static arm_cc get_arm_condition_code (rtx);
94 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
95 static rtx is_jump_table (rtx);
96 static const char *output_multi_immediate (rtx *, const char *, const char *,
98 static const char *shift_op (rtx, HOST_WIDE_INT *);
99 static struct machine_function *arm_init_machine_status (void);
100 static void thumb_exit (FILE *, int);
101 static rtx is_jump_table (rtx);
102 static HOST_WIDE_INT get_jump_table_size (rtx);
103 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
104 static Mnode *add_minipool_forward_ref (Mfix *);
105 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
106 static Mnode *add_minipool_backward_ref (Mfix *);
107 static void assign_minipool_offsets (Mfix *);
108 static void arm_print_value (FILE *, rtx);
109 static void dump_minipool (rtx);
110 static int arm_barrier_cost (rtx);
111 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
112 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
113 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
115 static void arm_reorg (void);
116 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
117 static unsigned long arm_compute_save_reg0_reg12_mask (void);
118 static unsigned long arm_compute_save_reg_mask (void);
119 static unsigned long arm_isr_value (tree);
120 static unsigned long arm_compute_func_type (void);
121 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
123 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
124 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
125 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
127 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
128 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
130 static int arm_comp_type_attributes (const_tree, const_tree);
131 static void arm_set_default_type_attributes (tree);
132 static int arm_adjust_cost (rtx, rtx, rtx, int);
133 static int count_insns_for_constant (HOST_WIDE_INT, int);
134 static int arm_get_strip_length (int);
135 static bool arm_function_ok_for_sibcall (tree, tree);
136 static enum machine_mode arm_promote_function_mode (const_tree,
137 enum machine_mode, int *,
139 static bool arm_return_in_memory (const_tree, const_tree);
140 static rtx arm_function_value (const_tree, const_tree, bool);
141 static rtx arm_libcall_value (enum machine_mode, const_rtx);
143 static void arm_internal_label (FILE *, const char *, unsigned long);
144 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
146 static bool arm_have_conditional_execution (void);
147 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
148 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
149 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
150 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
151 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
153 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
154 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
155 static bool arm_rtx_costs (rtx, int, int, int *, bool);
156 static int arm_address_cost (rtx, bool);
157 static bool arm_memory_load_p (rtx);
158 static bool arm_cirrus_insn_p (rtx);
159 static void cirrus_reorg (rtx);
160 static void arm_init_builtins (void);
161 static void arm_init_iwmmxt_builtins (void);
162 static rtx safe_vector_operand (rtx, enum machine_mode);
163 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
164 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
165 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
166 static tree arm_builtin_decl (unsigned, bool);
167 static void emit_constant_insn (rtx cond, rtx pattern);
168 static rtx emit_set_insn (rtx, rtx);
169 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
171 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
173 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
175 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
176 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
178 static int aapcs_select_return_coproc (const_tree, const_tree);
180 #ifdef OBJECT_FORMAT_ELF
181 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
182 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
185 static void arm_encode_section_info (tree, rtx, int);
188 static void arm_file_end (void);
189 static void arm_file_start (void);
191 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
193 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
194 enum machine_mode, const_tree, bool);
195 static bool arm_promote_prototypes (const_tree);
196 static bool arm_default_short_enums (void);
197 static bool arm_align_anon_bitfield (void);
198 static bool arm_return_in_msb (const_tree);
199 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
200 static bool arm_return_in_memory (const_tree, const_tree);
202 static void arm_unwind_emit (FILE *, rtx);
203 static bool arm_output_ttype (rtx);
204 static void arm_asm_emit_except_personality (rtx);
205 static void arm_asm_init_sections (void);
207 static enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
208 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
209 static rtx arm_dwarf_register_span (rtx);
211 static tree arm_cxx_guard_type (void);
212 static bool arm_cxx_guard_mask_bit (void);
213 static tree arm_get_cookie_size (tree);
214 static bool arm_cookie_has_size (void);
215 static bool arm_cxx_cdtor_returns_this (void);
216 static bool arm_cxx_key_method_may_be_inline (void);
217 static void arm_cxx_determine_class_data_visibility (tree);
218 static bool arm_cxx_class_data_always_comdat (void);
219 static bool arm_cxx_use_aeabi_atexit (void);
220 static void arm_init_libfuncs (void);
221 static tree arm_build_builtin_va_list (void);
222 static void arm_expand_builtin_va_start (tree, rtx);
223 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
224 static void arm_option_override (void);
225 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
226 static bool arm_cannot_copy_insn_p (rtx);
227 static bool arm_tls_symbol_p (rtx x);
228 static int arm_issue_rate (void);
229 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
230 static bool arm_output_addr_const_extra (FILE *, rtx);
231 static bool arm_allocate_stack_slots_for_args (void);
232 static const char *arm_invalid_parameter_type (const_tree t);
233 static const char *arm_invalid_return_type (const_tree t);
234 static tree arm_promoted_type (const_tree t);
235 static tree arm_convert_to_type (tree type, tree expr);
236 static bool arm_scalar_mode_supported_p (enum machine_mode);
237 static bool arm_frame_pointer_required (void);
238 static bool arm_can_eliminate (const int, const int);
239 static void arm_asm_trampoline_template (FILE *);
240 static void arm_trampoline_init (rtx, tree, rtx);
241 static rtx arm_trampoline_adjust_address (rtx);
242 static rtx arm_pic_static_addr (rtx orig, rtx reg);
243 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
244 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
245 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
246 static bool arm_array_mode_supported_p (enum machine_mode,
247 unsigned HOST_WIDE_INT);
248 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
249 static bool arm_class_likely_spilled_p (reg_class_t);
250 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
251 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
255 static void arm_conditional_register_usage (void);
256 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
257 static unsigned int arm_autovectorize_vector_sizes (void);
258 static int arm_default_branch_cost (bool, bool);
259 static int arm_cortex_a5_branch_cost (bool, bool);
262 /* Table of machine attributes. */
263 static const struct attribute_spec arm_attribute_table[] =
265 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
266 affects_type_identity } */
267 /* Function calls made to this symbol must be done indirectly, because
268 it may lie outside of the 26 bit addressing range of a normal function
270 { "long_call", 0, 0, false, true, true, NULL, false },
271 /* Whereas these functions are always known to reside within the 26 bit
273 { "short_call", 0, 0, false, true, true, NULL, false },
274 /* Specify the procedure call conventions for a function. */
275 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
277 /* Interrupt Service Routines have special prologue and epilogue requirements. */
278 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
280 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
282 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
285 /* ARM/PE has three new attributes:
287 dllexport - for exporting a function/variable that will live in a dll
288 dllimport - for importing a function/variable from a dll
290 Microsoft allows multiple declspecs in one __declspec, separating
291 them with spaces. We do NOT support this. Instead, use __declspec
294 { "dllimport", 0, 0, true, false, false, NULL, false },
295 { "dllexport", 0, 0, true, false, false, NULL, false },
296 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
298 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
299 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
300 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
301 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
304 { NULL, 0, 0, false, false, false, NULL, false }
307 /* Set default optimization options. */
308 static const struct default_options arm_option_optimization_table[] =
310 /* Enable section anchors by default at -O1 or higher. */
311 { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
312 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
313 { OPT_LEVELS_NONE, 0, NULL, 0 }
316 /* Initialize the GCC target structure. */
317 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
318 #undef TARGET_MERGE_DECL_ATTRIBUTES
319 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
322 #undef TARGET_LEGITIMIZE_ADDRESS
323 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
325 #undef TARGET_ATTRIBUTE_TABLE
326 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
328 #undef TARGET_ASM_FILE_START
329 #define TARGET_ASM_FILE_START arm_file_start
330 #undef TARGET_ASM_FILE_END
331 #define TARGET_ASM_FILE_END arm_file_end
333 #undef TARGET_ASM_ALIGNED_SI_OP
334 #define TARGET_ASM_ALIGNED_SI_OP NULL
335 #undef TARGET_ASM_INTEGER
336 #define TARGET_ASM_INTEGER arm_assemble_integer
338 #undef TARGET_PRINT_OPERAND
339 #define TARGET_PRINT_OPERAND arm_print_operand
340 #undef TARGET_PRINT_OPERAND_ADDRESS
341 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
342 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
343 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
345 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
346 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
348 #undef TARGET_ASM_FUNCTION_PROLOGUE
349 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
351 #undef TARGET_ASM_FUNCTION_EPILOGUE
352 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
354 #undef TARGET_DEFAULT_TARGET_FLAGS
355 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
356 #undef TARGET_OPTION_OVERRIDE
357 #define TARGET_OPTION_OVERRIDE arm_option_override
358 #undef TARGET_OPTION_OPTIMIZATION_TABLE
359 #define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
361 #undef TARGET_COMP_TYPE_ATTRIBUTES
362 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
364 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
365 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
367 #undef TARGET_SCHED_ADJUST_COST
368 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
370 #undef TARGET_ENCODE_SECTION_INFO
372 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
374 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
377 #undef TARGET_STRIP_NAME_ENCODING
378 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
380 #undef TARGET_ASM_INTERNAL_LABEL
381 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
383 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
384 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
386 #undef TARGET_FUNCTION_VALUE
387 #define TARGET_FUNCTION_VALUE arm_function_value
389 #undef TARGET_LIBCALL_VALUE
390 #define TARGET_LIBCALL_VALUE arm_libcall_value
392 #undef TARGET_ASM_OUTPUT_MI_THUNK
393 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
394 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
395 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
397 #undef TARGET_RTX_COSTS
398 #define TARGET_RTX_COSTS arm_rtx_costs
399 #undef TARGET_ADDRESS_COST
400 #define TARGET_ADDRESS_COST arm_address_cost
402 #undef TARGET_SHIFT_TRUNCATION_MASK
403 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
404 #undef TARGET_VECTOR_MODE_SUPPORTED_P
405 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
406 #undef TARGET_ARRAY_MODE_SUPPORTED_P
407 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
408 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
409 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
410 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
411 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
412 arm_autovectorize_vector_sizes
414 #undef TARGET_MACHINE_DEPENDENT_REORG
415 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
417 #undef TARGET_INIT_BUILTINS
418 #define TARGET_INIT_BUILTINS arm_init_builtins
419 #undef TARGET_EXPAND_BUILTIN
420 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
421 #undef TARGET_BUILTIN_DECL
422 #define TARGET_BUILTIN_DECL arm_builtin_decl
424 #undef TARGET_INIT_LIBFUNCS
425 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
427 #undef TARGET_PROMOTE_FUNCTION_MODE
428 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
429 #undef TARGET_PROMOTE_PROTOTYPES
430 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
431 #undef TARGET_PASS_BY_REFERENCE
432 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
433 #undef TARGET_ARG_PARTIAL_BYTES
434 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
435 #undef TARGET_FUNCTION_ARG
436 #define TARGET_FUNCTION_ARG arm_function_arg
437 #undef TARGET_FUNCTION_ARG_ADVANCE
438 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
439 #undef TARGET_FUNCTION_ARG_BOUNDARY
440 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
442 #undef TARGET_SETUP_INCOMING_VARARGS
443 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
445 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
446 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
448 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
449 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
450 #undef TARGET_TRAMPOLINE_INIT
451 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
452 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
453 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
455 #undef TARGET_DEFAULT_SHORT_ENUMS
456 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
458 #undef TARGET_ALIGN_ANON_BITFIELD
459 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
461 #undef TARGET_NARROW_VOLATILE_BITFIELD
462 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
464 #undef TARGET_CXX_GUARD_TYPE
465 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
467 #undef TARGET_CXX_GUARD_MASK_BIT
468 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
470 #undef TARGET_CXX_GET_COOKIE_SIZE
471 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
473 #undef TARGET_CXX_COOKIE_HAS_SIZE
474 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
476 #undef TARGET_CXX_CDTOR_RETURNS_THIS
477 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
479 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
480 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
482 #undef TARGET_CXX_USE_AEABI_ATEXIT
483 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
485 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
486 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
487 arm_cxx_determine_class_data_visibility
489 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
490 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
492 #undef TARGET_RETURN_IN_MSB
493 #define TARGET_RETURN_IN_MSB arm_return_in_msb
495 #undef TARGET_RETURN_IN_MEMORY
496 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
498 #undef TARGET_MUST_PASS_IN_STACK
499 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
502 #undef TARGET_ASM_UNWIND_EMIT
503 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
505 /* EABI unwinding tables use a different format for the typeinfo tables. */
506 #undef TARGET_ASM_TTYPE
507 #define TARGET_ASM_TTYPE arm_output_ttype
509 #undef TARGET_ARM_EABI_UNWINDER
510 #define TARGET_ARM_EABI_UNWINDER true
512 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
513 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
515 #undef TARGET_ASM_INIT_SECTIONS
516 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
517 #endif /* ARM_UNWIND_INFO */
519 #undef TARGET_EXCEPT_UNWIND_INFO
520 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
522 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
523 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
525 #undef TARGET_DWARF_REGISTER_SPAN
526 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
528 #undef TARGET_CANNOT_COPY_INSN_P
529 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
532 #undef TARGET_HAVE_TLS
533 #define TARGET_HAVE_TLS true
536 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
537 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
539 #undef TARGET_LEGITIMATE_CONSTANT_P
540 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
542 #undef TARGET_CANNOT_FORCE_CONST_MEM
543 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
545 #undef TARGET_MAX_ANCHOR_OFFSET
546 #define TARGET_MAX_ANCHOR_OFFSET 4095
548 /* The minimum is set such that the total size of the block
549 for a particular anchor is -4088 + 1 + 4095 bytes, which is
550 divisible by eight, ensuring natural spacing of anchors. */
551 #undef TARGET_MIN_ANCHOR_OFFSET
552 #define TARGET_MIN_ANCHOR_OFFSET -4088
554 #undef TARGET_SCHED_ISSUE_RATE
555 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
557 #undef TARGET_MANGLE_TYPE
558 #define TARGET_MANGLE_TYPE arm_mangle_type
560 #undef TARGET_BUILD_BUILTIN_VA_LIST
561 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
562 #undef TARGET_EXPAND_BUILTIN_VA_START
563 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
564 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
565 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
568 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
569 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
572 #undef TARGET_LEGITIMATE_ADDRESS_P
573 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
575 #undef TARGET_INVALID_PARAMETER_TYPE
576 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
578 #undef TARGET_INVALID_RETURN_TYPE
579 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
581 #undef TARGET_PROMOTED_TYPE
582 #define TARGET_PROMOTED_TYPE arm_promoted_type
584 #undef TARGET_CONVERT_TO_TYPE
585 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
587 #undef TARGET_SCALAR_MODE_SUPPORTED_P
588 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
590 #undef TARGET_FRAME_POINTER_REQUIRED
591 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
593 #undef TARGET_CAN_ELIMINATE
594 #define TARGET_CAN_ELIMINATE arm_can_eliminate
596 #undef TARGET_CONDITIONAL_REGISTER_USAGE
597 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
599 #undef TARGET_CLASS_LIKELY_SPILLED_P
600 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
602 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
603 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
604 arm_vector_alignment_reachable
606 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
607 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
608 arm_builtin_support_vector_misalignment
610 #undef TARGET_PREFERRED_RENAME_CLASS
611 #define TARGET_PREFERRED_RENAME_CLASS \
612 arm_preferred_rename_class
614 struct gcc_target targetm = TARGET_INITIALIZER;
616 /* Obstack for minipool constant handling. */
617 static struct obstack minipool_obstack;
618 static char * minipool_startobj;
620 /* The maximum number of insns skipped which
621 will be conditionalised if possible. */
622 static int max_insns_skipped = 5;
624 extern FILE * asm_out_file;
626 /* True if we are currently building a constant table. */
627 int making_const_table;
629 /* The processor for which instructions should be scheduled. */
630 enum processor_type arm_tune = arm_none;
632 /* The current tuning set. */
633 const struct tune_params *current_tune;
635 /* Which floating point hardware to schedule for. */
638 /* Which floating popint hardware to use. */
639 const struct arm_fpu_desc *arm_fpu_desc;
641 /* Used for Thumb call_via trampolines. */
642 rtx thumb_call_via_label[14];
643 static int thumb_call_reg_needed;
645 /* Bit values used to identify processor capabilities. */
646 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
647 #define FL_ARCH3M (1 << 1) /* Extended multiply */
648 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
649 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
650 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
651 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
652 #define FL_THUMB (1 << 6) /* Thumb aware */
653 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
654 #define FL_STRONG (1 << 8) /* StrongARM */
655 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
656 #define FL_XSCALE (1 << 10) /* XScale */
657 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
658 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
659 media instructions. */
660 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
661 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
662 Note: ARM6 & 7 derivatives only. */
663 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
664 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
665 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
667 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
668 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
669 #define FL_NEON (1 << 20) /* Neon instructions. */
670 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
672 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
673 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
675 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
677 /* Flags that only effect tuning, not available instructions. */
678 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
681 #define FL_FOR_ARCH2 FL_NOTM
682 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
683 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
684 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
685 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
686 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
687 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
688 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
689 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
690 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
691 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
692 #define FL_FOR_ARCH6J FL_FOR_ARCH6
693 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
694 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
695 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
696 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
697 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
698 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
699 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
700 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
701 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
702 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
704 /* The bits in this mask specify which
705 instructions we are allowed to generate. */
706 static unsigned long insn_flags = 0;
708 /* The bits in this mask specify which instruction scheduling options should
710 static unsigned long tune_flags = 0;
712 /* The following are used in the arm.md file as equivalents to bits
713 in the above two flag variables. */
715 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
718 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
721 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
724 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
727 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
730 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
733 /* Nonzero if this chip supports the ARM 6K extensions. */
736 /* Nonzero if this chip supports the ARM 7 extensions. */
739 /* Nonzero if instructions not present in the 'M' profile can be used. */
740 int arm_arch_notm = 0;
742 /* Nonzero if instructions present in ARMv7E-M can be used. */
745 /* Nonzero if this chip can benefit from load scheduling. */
746 int arm_ld_sched = 0;
748 /* Nonzero if this chip is a StrongARM. */
749 int arm_tune_strongarm = 0;
751 /* Nonzero if this chip is a Cirrus variant. */
752 int arm_arch_cirrus = 0;
754 /* Nonzero if this chip supports Intel Wireless MMX technology. */
755 int arm_arch_iwmmxt = 0;
757 /* Nonzero if this chip is an XScale. */
758 int arm_arch_xscale = 0;
760 /* Nonzero if tuning for XScale */
761 int arm_tune_xscale = 0;
763 /* Nonzero if we want to tune for stores that access the write-buffer.
764 This typically means an ARM6 or ARM7 with MMU or MPU. */
765 int arm_tune_wbuf = 0;
767 /* Nonzero if tuning for Cortex-A9. */
768 int arm_tune_cortex_a9 = 0;
770 /* Nonzero if generating Thumb instructions. */
773 /* Nonzero if generating Thumb-1 instructions. */
776 /* Nonzero if we should define __THUMB_INTERWORK__ in the
778 XXX This is a bit of a hack, it's intended to help work around
779 problems in GLD which doesn't understand that armv5t code is
780 interworking clean. */
781 int arm_cpp_interwork = 0;
783 /* Nonzero if chip supports Thumb 2. */
786 /* Nonzero if chip supports integer division instruction. */
787 int arm_arch_arm_hwdiv;
788 int arm_arch_thumb_hwdiv;
790 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
791 we must report the mode of the memory reference from
792 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
793 enum machine_mode output_memory_reference_mode;
795 /* The register number to be used for the PIC offset register. */
796 unsigned arm_pic_register = INVALID_REGNUM;
798 /* Set to 1 after arm_reorg has started. Reset to start at the start of
799 the next function. */
800 static int after_arm_reorg = 0;
802 enum arm_pcs arm_pcs_default;
804 /* For an explanation of these variables, see final_prescan_insn below. */
806 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
807 enum arm_cond_code arm_current_cc;
810 int arm_target_label;
811 /* The number of conditionally executed insns, including the current insn. */
812 int arm_condexec_count = 0;
813 /* A bitmask specifying the patterns for the IT block.
814 Zero means do not output an IT block before this insn. */
815 int arm_condexec_mask = 0;
816 /* The number of bits used in arm_condexec_mask. */
817 int arm_condexec_masklen = 0;
819 /* The condition codes of the ARM, and the inverse function. */
820 static const char * const arm_condition_codes[] =
822 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
823 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
826 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
827 int arm_regs_in_sequence[] =
829 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
832 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
833 #define streq(string1, string2) (strcmp (string1, string2) == 0)
835 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
836 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
837 | (1 << PIC_OFFSET_TABLE_REGNUM)))
839 /* Initialization code. */
843 const char *const name;
844 enum processor_type core;
846 const unsigned long flags;
847 const struct tune_params *const tune;
851 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
852 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
857 const struct tune_params arm_slowmul_tune =
859 arm_slowmul_rtx_costs,
861 3, /* Constant limit. */
862 ARM_PREFETCH_NOT_BENEFICIAL,
863 true, /* Prefer constant pool. */
864 arm_default_branch_cost
867 const struct tune_params arm_fastmul_tune =
869 arm_fastmul_rtx_costs,
871 1, /* Constant limit. */
872 ARM_PREFETCH_NOT_BENEFICIAL,
873 true, /* Prefer constant pool. */
874 arm_default_branch_cost
877 const struct tune_params arm_xscale_tune =
879 arm_xscale_rtx_costs,
880 xscale_sched_adjust_cost,
881 2, /* Constant limit. */
882 ARM_PREFETCH_NOT_BENEFICIAL,
883 true, /* Prefer constant pool. */
884 arm_default_branch_cost
887 const struct tune_params arm_9e_tune =
891 1, /* Constant limit. */
892 ARM_PREFETCH_NOT_BENEFICIAL,
893 true, /* Prefer constant pool. */
894 arm_default_branch_cost
897 const struct tune_params arm_v6t2_tune =
901 1, /* Constant limit. */
902 ARM_PREFETCH_NOT_BENEFICIAL,
903 false, /* Prefer constant pool. */
904 arm_default_branch_cost
907 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
908 const struct tune_params arm_cortex_tune =
912 1, /* Constant limit. */
913 ARM_PREFETCH_NOT_BENEFICIAL,
914 false, /* Prefer constant pool. */
915 arm_default_branch_cost
918 const struct tune_params arm_cortex_a5_tune =
922 1, /* Constant limit. */
923 ARM_PREFETCH_NOT_BENEFICIAL,
924 false, /* Prefer constant pool. */
925 arm_cortex_a5_branch_cost
928 const struct tune_params arm_cortex_a9_tune =
931 cortex_a9_sched_adjust_cost,
932 1, /* Constant limit. */
933 ARM_PREFETCH_BENEFICIAL(4,32,32),
934 false, /* Prefer constant pool. */
935 arm_default_branch_cost
938 const struct tune_params arm_fa726te_tune =
941 fa726te_sched_adjust_cost,
942 1, /* Constant limit. */
943 ARM_PREFETCH_NOT_BENEFICIAL,
944 true, /* Prefer constant pool. */
945 arm_default_branch_cost
949 /* Not all of these give usefully different compilation alternatives,
950 but there is no simple way of generalizing them. */
951 static const struct processors all_cores[] =
954 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
955 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
956 #include "arm-cores.def"
958 {NULL, arm_none, NULL, 0, NULL}
961 static const struct processors all_architectures[] =
963 /* ARM Architectures */
964 /* We don't specify tuning costs here as it will be figured out
967 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
968 {NAME, CORE, #ARCH, FLAGS, NULL},
969 #include "arm-arches.def"
971 {NULL, arm_none, NULL, 0 , NULL}
975 /* These are populated as commandline arguments are processed, or NULL
977 static const struct processors *arm_selected_arch;
978 static const struct processors *arm_selected_cpu;
979 static const struct processors *arm_selected_tune;
981 /* The name of the preprocessor macro to define for this architecture. */
983 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
985 /* Available values for -mfpu=. */
987 static const struct arm_fpu_desc all_fpus[] =
989 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
990 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
991 #include "arm-fpus.def"
996 /* Supported TLS relocations. */
1006 /* The maximum number of insns to be used when loading a constant. */
1008 arm_constant_limit (bool size_p)
1010 return size_p ? 1 : current_tune->constant_limit;
1013 /* Emit an insn that's a simple single-set. Both the operands must be known
1016 emit_set_insn (rtx x, rtx y)
1018 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1021 /* Return the number of bits set in VALUE. */
1023 bit_count (unsigned long value)
1025 unsigned long count = 0;
1030 value &= value - 1; /* Clear the least-significant set bit. */
1036 /* Set up library functions unique to ARM. */
1039 arm_init_libfuncs (void)
1041 /* There are no special library functions unless we are using the
1046 /* The functions below are described in Section 4 of the "Run-Time
1047 ABI for the ARM architecture", Version 1.0. */
1049 /* Double-precision floating-point arithmetic. Table 2. */
1050 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1051 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1052 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1053 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1054 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1056 /* Double-precision comparisons. Table 3. */
1057 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1058 set_optab_libfunc (ne_optab, DFmode, NULL);
1059 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1060 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1061 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1062 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1063 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1065 /* Single-precision floating-point arithmetic. Table 4. */
1066 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1067 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1068 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1069 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1070 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1072 /* Single-precision comparisons. Table 5. */
1073 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1074 set_optab_libfunc (ne_optab, SFmode, NULL);
1075 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1076 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1077 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1078 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1079 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1081 /* Floating-point to integer conversions. Table 6. */
1082 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1083 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1084 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1085 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1086 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1087 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1088 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1089 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1091 /* Conversions between floating types. Table 7. */
1092 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1093 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1095 /* Integer to floating-point conversions. Table 8. */
1096 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1097 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1098 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1099 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1100 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1101 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1102 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1103 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1105 /* Long long. Table 9. */
1106 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1107 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1108 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1109 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1110 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1111 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1112 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1113 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1115 /* Integer (32/32->32) division. \S 4.3.1. */
1116 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1117 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1119 /* The divmod functions are designed so that they can be used for
1120 plain division, even though they return both the quotient and the
1121 remainder. The quotient is returned in the usual location (i.e.,
1122 r0 for SImode, {r0, r1} for DImode), just as would be expected
1123 for an ordinary division routine. Because the AAPCS calling
1124 conventions specify that all of { r0, r1, r2, r3 } are
1125 callee-saved registers, there is no need to tell the compiler
1126 explicitly that those registers are clobbered by these
1128 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1129 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1131 /* For SImode division the ABI provides div-without-mod routines,
1132 which are faster. */
1133 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1134 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1136 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1137 divmod libcalls instead. */
1138 set_optab_libfunc (smod_optab, DImode, NULL);
1139 set_optab_libfunc (umod_optab, DImode, NULL);
1140 set_optab_libfunc (smod_optab, SImode, NULL);
1141 set_optab_libfunc (umod_optab, SImode, NULL);
1143 /* Half-precision float operations. The compiler handles all operations
1144 with NULL libfuncs by converting the SFmode. */
1145 switch (arm_fp16_format)
1147 case ARM_FP16_FORMAT_IEEE:
1148 case ARM_FP16_FORMAT_ALTERNATIVE:
1151 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1152 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1154 : "__gnu_f2h_alternative"));
1155 set_conv_libfunc (sext_optab, SFmode, HFmode,
1156 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1158 : "__gnu_h2f_alternative"));
1161 set_optab_libfunc (add_optab, HFmode, NULL);
1162 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1163 set_optab_libfunc (smul_optab, HFmode, NULL);
1164 set_optab_libfunc (neg_optab, HFmode, NULL);
1165 set_optab_libfunc (sub_optab, HFmode, NULL);
1168 set_optab_libfunc (eq_optab, HFmode, NULL);
1169 set_optab_libfunc (ne_optab, HFmode, NULL);
1170 set_optab_libfunc (lt_optab, HFmode, NULL);
1171 set_optab_libfunc (le_optab, HFmode, NULL);
1172 set_optab_libfunc (ge_optab, HFmode, NULL);
1173 set_optab_libfunc (gt_optab, HFmode, NULL);
1174 set_optab_libfunc (unord_optab, HFmode, NULL);
1181 if (TARGET_AAPCS_BASED)
1182 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1185 /* On AAPCS systems, this is the "struct __va_list". */
1186 static GTY(()) tree va_list_type;
1188 /* Return the type to use as __builtin_va_list. */
1190 arm_build_builtin_va_list (void)
1195 if (!TARGET_AAPCS_BASED)
1196 return std_build_builtin_va_list ();
1198 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1206 The C Library ABI further reinforces this definition in \S
1209 We must follow this definition exactly. The structure tag
1210 name is visible in C++ mangled names, and thus forms a part
1211 of the ABI. The field name may be used by people who
1212 #include <stdarg.h>. */
1213 /* Create the type. */
1214 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1215 /* Give it the required name. */
1216 va_list_name = build_decl (BUILTINS_LOCATION,
1218 get_identifier ("__va_list"),
1220 DECL_ARTIFICIAL (va_list_name) = 1;
1221 TYPE_NAME (va_list_type) = va_list_name;
1222 TYPE_STUB_DECL (va_list_type) = va_list_name;
1223 /* Create the __ap field. */
1224 ap_field = build_decl (BUILTINS_LOCATION,
1226 get_identifier ("__ap"),
1228 DECL_ARTIFICIAL (ap_field) = 1;
1229 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1230 TYPE_FIELDS (va_list_type) = ap_field;
1231 /* Compute its layout. */
1232 layout_type (va_list_type);
1234 return va_list_type;
1237 /* Return an expression of type "void *" pointing to the next
1238 available argument in a variable-argument list. VALIST is the
1239 user-level va_list object, of type __builtin_va_list. */
1241 arm_extract_valist_ptr (tree valist)
1243 if (TREE_TYPE (valist) == error_mark_node)
1244 return error_mark_node;
1246 /* On an AAPCS target, the pointer is stored within "struct
1248 if (TARGET_AAPCS_BASED)
1250 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1251 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1252 valist, ap_field, NULL_TREE);
1258 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1260 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1262 valist = arm_extract_valist_ptr (valist);
1263 std_expand_builtin_va_start (valist, nextarg);
1266 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1268 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1271 valist = arm_extract_valist_ptr (valist);
1272 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1275 /* Fix up any incompatible options that the user has specified. */
1277 arm_option_override (void)
1279 if (global_options_set.x_arm_arch_option)
1280 arm_selected_arch = &all_architectures[arm_arch_option];
1282 if (global_options_set.x_arm_cpu_option)
1283 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1285 if (global_options_set.x_arm_tune_option)
1286 arm_selected_tune = &all_cores[(int) arm_tune_option];
1288 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1289 SUBTARGET_OVERRIDE_OPTIONS;
1292 if (arm_selected_arch)
1294 if (arm_selected_cpu)
1296 /* Check for conflict between mcpu and march. */
1297 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1299 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1300 arm_selected_cpu->name, arm_selected_arch->name);
1301 /* -march wins for code generation.
1302 -mcpu wins for default tuning. */
1303 if (!arm_selected_tune)
1304 arm_selected_tune = arm_selected_cpu;
1306 arm_selected_cpu = arm_selected_arch;
1310 arm_selected_arch = NULL;
1313 /* Pick a CPU based on the architecture. */
1314 arm_selected_cpu = arm_selected_arch;
1317 /* If the user did not specify a processor, choose one for them. */
1318 if (!arm_selected_cpu)
1320 const struct processors * sel;
1321 unsigned int sought;
1323 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1324 if (!arm_selected_cpu->name)
1326 #ifdef SUBTARGET_CPU_DEFAULT
1327 /* Use the subtarget default CPU if none was specified by
1329 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1331 /* Default to ARM6. */
1332 if (!arm_selected_cpu->name)
1333 arm_selected_cpu = &all_cores[arm6];
1336 sel = arm_selected_cpu;
1337 insn_flags = sel->flags;
1339 /* Now check to see if the user has specified some command line
1340 switch that require certain abilities from the cpu. */
1343 if (TARGET_INTERWORK || TARGET_THUMB)
1345 sought |= (FL_THUMB | FL_MODE32);
1347 /* There are no ARM processors that support both APCS-26 and
1348 interworking. Therefore we force FL_MODE26 to be removed
1349 from insn_flags here (if it was set), so that the search
1350 below will always be able to find a compatible processor. */
1351 insn_flags &= ~FL_MODE26;
1354 if (sought != 0 && ((sought & insn_flags) != sought))
1356 /* Try to locate a CPU type that supports all of the abilities
1357 of the default CPU, plus the extra abilities requested by
1359 for (sel = all_cores; sel->name != NULL; sel++)
1360 if ((sel->flags & sought) == (sought | insn_flags))
1363 if (sel->name == NULL)
1365 unsigned current_bit_count = 0;
1366 const struct processors * best_fit = NULL;
1368 /* Ideally we would like to issue an error message here
1369 saying that it was not possible to find a CPU compatible
1370 with the default CPU, but which also supports the command
1371 line options specified by the programmer, and so they
1372 ought to use the -mcpu=<name> command line option to
1373 override the default CPU type.
1375 If we cannot find a cpu that has both the
1376 characteristics of the default cpu and the given
1377 command line options we scan the array again looking
1378 for a best match. */
1379 for (sel = all_cores; sel->name != NULL; sel++)
1380 if ((sel->flags & sought) == sought)
1384 count = bit_count (sel->flags & insn_flags);
1386 if (count >= current_bit_count)
1389 current_bit_count = count;
1393 gcc_assert (best_fit);
1397 arm_selected_cpu = sel;
1401 gcc_assert (arm_selected_cpu);
1402 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1403 if (!arm_selected_tune)
1404 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1406 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1407 insn_flags = arm_selected_cpu->flags;
1409 arm_tune = arm_selected_tune->core;
1410 tune_flags = arm_selected_tune->flags;
1411 current_tune = arm_selected_tune->tune;
1413 /* Make sure that the processor choice does not conflict with any of the
1414 other command line choices. */
1415 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1416 error ("target CPU does not support ARM mode");
1418 /* BPABI targets use linker tricks to allow interworking on cores
1419 without thumb support. */
1420 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1422 warning (0, "target CPU does not support interworking" );
1423 target_flags &= ~MASK_INTERWORK;
1426 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1428 warning (0, "target CPU does not support THUMB instructions");
1429 target_flags &= ~MASK_THUMB;
1432 if (TARGET_APCS_FRAME && TARGET_THUMB)
1434 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1435 target_flags &= ~MASK_APCS_FRAME;
1438 /* Callee super interworking implies thumb interworking. Adding
1439 this to the flags here simplifies the logic elsewhere. */
1440 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1441 target_flags |= MASK_INTERWORK;
1443 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1444 from here where no function is being compiled currently. */
1445 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1446 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1448 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1449 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1451 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1453 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1454 target_flags |= MASK_APCS_FRAME;
1457 if (TARGET_POKE_FUNCTION_NAME)
1458 target_flags |= MASK_APCS_FRAME;
1460 if (TARGET_APCS_REENT && flag_pic)
1461 error ("-fpic and -mapcs-reent are incompatible");
1463 if (TARGET_APCS_REENT)
1464 warning (0, "APCS reentrant code not supported. Ignored");
1466 /* If this target is normally configured to use APCS frames, warn if they
1467 are turned off and debugging is turned on. */
1469 && write_symbols != NO_DEBUG
1470 && !TARGET_APCS_FRAME
1471 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1472 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1474 if (TARGET_APCS_FLOAT)
1475 warning (0, "passing floating point arguments in fp regs not yet supported");
1477 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1478 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1479 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1480 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1481 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1482 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1483 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1484 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1485 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1486 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1487 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1488 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1489 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1490 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1492 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1493 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1494 thumb_code = TARGET_ARM == 0;
1495 thumb1_code = TARGET_THUMB1 != 0;
1496 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1497 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1498 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1499 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1500 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1501 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1503 /* If we are not using the default (ARM mode) section anchor offset
1504 ranges, then set the correct ranges now. */
1507 /* Thumb-1 LDR instructions cannot have negative offsets.
1508 Permissible positive offset ranges are 5-bit (for byte loads),
1509 6-bit (for halfword loads), or 7-bit (for word loads).
1510 Empirical results suggest a 7-bit anchor range gives the best
1511 overall code size. */
1512 targetm.min_anchor_offset = 0;
1513 targetm.max_anchor_offset = 127;
1515 else if (TARGET_THUMB2)
1517 /* The minimum is set such that the total size of the block
1518 for a particular anchor is 248 + 1 + 4095 bytes, which is
1519 divisible by eight, ensuring natural spacing of anchors. */
1520 targetm.min_anchor_offset = -248;
1521 targetm.max_anchor_offset = 4095;
1524 /* V5 code we generate is completely interworking capable, so we turn off
1525 TARGET_INTERWORK here to avoid many tests later on. */
1527 /* XXX However, we must pass the right pre-processor defines to CPP
1528 or GLD can get confused. This is a hack. */
1529 if (TARGET_INTERWORK)
1530 arm_cpp_interwork = 1;
1533 target_flags &= ~MASK_INTERWORK;
1535 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1536 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1538 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1539 error ("iwmmxt abi requires an iwmmxt capable cpu");
1541 if (!global_options_set.x_arm_fpu_index)
1543 const char *target_fpu_name;
1546 #ifdef FPUTYPE_DEFAULT
1547 target_fpu_name = FPUTYPE_DEFAULT;
1549 if (arm_arch_cirrus)
1550 target_fpu_name = "maverick";
1552 target_fpu_name = "fpe2";
1555 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1560 arm_fpu_desc = &all_fpus[arm_fpu_index];
1562 switch (arm_fpu_desc->model)
1564 case ARM_FP_MODEL_FPA:
1565 if (arm_fpu_desc->rev == 2)
1566 arm_fpu_attr = FPU_FPE2;
1567 else if (arm_fpu_desc->rev == 3)
1568 arm_fpu_attr = FPU_FPE3;
1570 arm_fpu_attr = FPU_FPA;
1573 case ARM_FP_MODEL_MAVERICK:
1574 arm_fpu_attr = FPU_MAVERICK;
1577 case ARM_FP_MODEL_VFP:
1578 arm_fpu_attr = FPU_VFP;
1585 if (TARGET_AAPCS_BASED
1586 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1587 error ("FPA is unsupported in the AAPCS");
1589 if (TARGET_AAPCS_BASED)
1591 if (TARGET_CALLER_INTERWORKING)
1592 error ("AAPCS does not support -mcaller-super-interworking");
1594 if (TARGET_CALLEE_INTERWORKING)
1595 error ("AAPCS does not support -mcallee-super-interworking");
1598 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1599 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1600 will ever exist. GCC makes no attempt to support this combination. */
1601 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1602 sorry ("iWMMXt and hardware floating point");
1604 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1605 if (TARGET_THUMB2 && TARGET_IWMMXT)
1606 sorry ("Thumb-2 iWMMXt");
1608 /* __fp16 support currently assumes the core has ldrh. */
1609 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1610 sorry ("__fp16 and no ldrh");
1612 /* If soft-float is specified then don't use FPU. */
1613 if (TARGET_SOFT_FLOAT)
1614 arm_fpu_attr = FPU_NONE;
1616 if (TARGET_AAPCS_BASED)
1618 if (arm_abi == ARM_ABI_IWMMXT)
1619 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1620 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1621 && TARGET_HARD_FLOAT
1623 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1625 arm_pcs_default = ARM_PCS_AAPCS;
1629 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1630 sorry ("-mfloat-abi=hard and VFP");
1632 if (arm_abi == ARM_ABI_APCS)
1633 arm_pcs_default = ARM_PCS_APCS;
1635 arm_pcs_default = ARM_PCS_ATPCS;
1638 /* For arm2/3 there is no need to do any scheduling if there is only
1639 a floating point emulator, or we are doing software floating-point. */
1640 if ((TARGET_SOFT_FLOAT
1641 || (TARGET_FPA && arm_fpu_desc->rev))
1642 && (tune_flags & FL_MODE32) == 0)
1643 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1645 /* Use the cp15 method if it is available. */
1646 if (target_thread_pointer == TP_AUTO)
1648 if (arm_arch6k && !TARGET_THUMB1)
1649 target_thread_pointer = TP_CP15;
1651 target_thread_pointer = TP_SOFT;
1654 if (TARGET_HARD_TP && TARGET_THUMB1)
1655 error ("can not use -mtp=cp15 with 16-bit Thumb");
1657 /* Override the default structure alignment for AAPCS ABI. */
1658 if (!global_options_set.x_arm_structure_size_boundary)
1660 if (TARGET_AAPCS_BASED)
1661 arm_structure_size_boundary = 8;
1665 if (arm_structure_size_boundary != 8
1666 && arm_structure_size_boundary != 32
1667 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1669 if (ARM_DOUBLEWORD_ALIGN)
1671 "structure size boundary can only be set to 8, 32 or 64");
1673 warning (0, "structure size boundary can only be set to 8 or 32");
1674 arm_structure_size_boundary
1675 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1679 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1681 error ("RTP PIC is incompatible with Thumb");
1685 /* If stack checking is disabled, we can use r10 as the PIC register,
1686 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1687 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1689 if (TARGET_VXWORKS_RTP)
1690 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1691 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1694 if (flag_pic && TARGET_VXWORKS_RTP)
1695 arm_pic_register = 9;
1697 if (arm_pic_register_string != NULL)
1699 int pic_register = decode_reg_name (arm_pic_register_string);
1702 warning (0, "-mpic-register= is useless without -fpic");
1704 /* Prevent the user from choosing an obviously stupid PIC register. */
1705 else if (pic_register < 0 || call_used_regs[pic_register]
1706 || pic_register == HARD_FRAME_POINTER_REGNUM
1707 || pic_register == STACK_POINTER_REGNUM
1708 || pic_register >= PC_REGNUM
1709 || (TARGET_VXWORKS_RTP
1710 && (unsigned int) pic_register != arm_pic_register))
1711 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1713 arm_pic_register = pic_register;
1716 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1717 if (fix_cm3_ldrd == 2)
1719 if (arm_selected_cpu->core == cortexm3)
1725 if (TARGET_THUMB1 && flag_schedule_insns)
1727 /* Don't warn since it's on by default in -O2. */
1728 flag_schedule_insns = 0;
1733 /* If optimizing for size, bump the number of instructions that we
1734 are prepared to conditionally execute (even on a StrongARM). */
1735 max_insns_skipped = 6;
1739 /* StrongARM has early execution of branches, so a sequence
1740 that is worth skipping is shorter. */
1741 if (arm_tune_strongarm)
1742 max_insns_skipped = 3;
1745 /* Hot/Cold partitioning is not currently supported, since we can't
1746 handle literal pool placement in that case. */
1747 if (flag_reorder_blocks_and_partition)
1749 inform (input_location,
1750 "-freorder-blocks-and-partition not supported on this architecture");
1751 flag_reorder_blocks_and_partition = 0;
1752 flag_reorder_blocks = 1;
1756 /* Hoisting PIC address calculations more aggressively provides a small,
1757 but measurable, size reduction for PIC code. Therefore, we decrease
1758 the bar for unrestricted expression hoisting to the cost of PIC address
1759 calculation, which is 2 instructions. */
1760 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1761 global_options.x_param_values,
1762 global_options_set.x_param_values);
1764 /* ARM EABI defaults to strict volatile bitfields. */
1765 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
1766 flag_strict_volatile_bitfields = 1;
1768 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1769 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1770 if (flag_prefetch_loop_arrays < 0
1773 && current_tune->num_prefetch_slots > 0)
1774 flag_prefetch_loop_arrays = 1;
1776 /* Set up parameters to be used in prefetching algorithm. Do not override the
1777 defaults unless we are tuning for a core we have researched values for. */
1778 if (current_tune->num_prefetch_slots > 0)
1779 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1780 current_tune->num_prefetch_slots,
1781 global_options.x_param_values,
1782 global_options_set.x_param_values);
1783 if (current_tune->l1_cache_line_size >= 0)
1784 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1785 current_tune->l1_cache_line_size,
1786 global_options.x_param_values,
1787 global_options_set.x_param_values);
1788 if (current_tune->l1_cache_size >= 0)
1789 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1790 current_tune->l1_cache_size,
1791 global_options.x_param_values,
1792 global_options_set.x_param_values);
1794 /* Register global variables with the garbage collector. */
1795 arm_add_gc_roots ();
1799 arm_add_gc_roots (void)
1801 gcc_obstack_init(&minipool_obstack);
1802 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1805 /* A table of known ARM exception types.
1806 For use with the interrupt function attribute. */
1810 const char *const arg;
1811 const unsigned long return_value;
1815 static const isr_attribute_arg isr_attribute_args [] =
1817 { "IRQ", ARM_FT_ISR },
1818 { "irq", ARM_FT_ISR },
1819 { "FIQ", ARM_FT_FIQ },
1820 { "fiq", ARM_FT_FIQ },
1821 { "ABORT", ARM_FT_ISR },
1822 { "abort", ARM_FT_ISR },
1823 { "ABORT", ARM_FT_ISR },
1824 { "abort", ARM_FT_ISR },
1825 { "UNDEF", ARM_FT_EXCEPTION },
1826 { "undef", ARM_FT_EXCEPTION },
1827 { "SWI", ARM_FT_EXCEPTION },
1828 { "swi", ARM_FT_EXCEPTION },
1829 { NULL, ARM_FT_NORMAL }
1832 /* Returns the (interrupt) function type of the current
1833 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1835 static unsigned long
1836 arm_isr_value (tree argument)
1838 const isr_attribute_arg * ptr;
1842 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1844 /* No argument - default to IRQ. */
1845 if (argument == NULL_TREE)
1848 /* Get the value of the argument. */
1849 if (TREE_VALUE (argument) == NULL_TREE
1850 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1851 return ARM_FT_UNKNOWN;
1853 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1855 /* Check it against the list of known arguments. */
1856 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1857 if (streq (arg, ptr->arg))
1858 return ptr->return_value;
1860 /* An unrecognized interrupt type. */
1861 return ARM_FT_UNKNOWN;
1864 /* Computes the type of the current function. */
1866 static unsigned long
1867 arm_compute_func_type (void)
1869 unsigned long type = ARM_FT_UNKNOWN;
1873 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1875 /* Decide if the current function is volatile. Such functions
1876 never return, and many memory cycles can be saved by not storing
1877 register values that will never be needed again. This optimization
1878 was added to speed up context switching in a kernel application. */
1880 && (TREE_NOTHROW (current_function_decl)
1881 || !(flag_unwind_tables
1883 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
1884 && TREE_THIS_VOLATILE (current_function_decl))
1885 type |= ARM_FT_VOLATILE;
1887 if (cfun->static_chain_decl != NULL)
1888 type |= ARM_FT_NESTED;
1890 attr = DECL_ATTRIBUTES (current_function_decl);
1892 a = lookup_attribute ("naked", attr);
1894 type |= ARM_FT_NAKED;
1896 a = lookup_attribute ("isr", attr);
1898 a = lookup_attribute ("interrupt", attr);
1901 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1903 type |= arm_isr_value (TREE_VALUE (a));
1908 /* Returns the type of the current function. */
1911 arm_current_func_type (void)
1913 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1914 cfun->machine->func_type = arm_compute_func_type ();
1916 return cfun->machine->func_type;
1920 arm_allocate_stack_slots_for_args (void)
1922 /* Naked functions should not allocate stack slots for arguments. */
1923 return !IS_NAKED (arm_current_func_type ());
1927 /* Output assembler code for a block containing the constant parts
1928 of a trampoline, leaving space for the variable parts.
1930 On the ARM, (if r8 is the static chain regnum, and remembering that
1931 referencing pc adds an offset of 8) the trampoline looks like:
1934 .word static chain value
1935 .word function's address
1936 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
1939 arm_asm_trampoline_template (FILE *f)
1943 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
1944 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
1946 else if (TARGET_THUMB2)
1948 /* The Thumb-2 trampoline is similar to the arm implementation.
1949 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
1950 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
1951 STATIC_CHAIN_REGNUM, PC_REGNUM);
1952 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
1956 ASM_OUTPUT_ALIGN (f, 2);
1957 fprintf (f, "\t.code\t16\n");
1958 fprintf (f, ".Ltrampoline_start:\n");
1959 asm_fprintf (f, "\tpush\t{r0, r1}\n");
1960 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
1961 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
1962 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
1963 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
1964 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
1966 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
1967 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
1970 /* Emit RTL insns to initialize the variable parts of a trampoline. */
1973 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
1975 rtx fnaddr, mem, a_tramp;
1977 emit_block_move (m_tramp, assemble_trampoline_template (),
1978 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
1980 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
1981 emit_move_insn (mem, chain_value);
1983 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
1984 fnaddr = XEXP (DECL_RTL (fndecl), 0);
1985 emit_move_insn (mem, fnaddr);
1987 a_tramp = XEXP (m_tramp, 0);
1988 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
1989 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
1990 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
1993 /* Thumb trampolines should be entered in thumb mode, so set
1994 the bottom bit of the address. */
1997 arm_trampoline_adjust_address (rtx addr)
2000 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2001 NULL, 0, OPTAB_LIB_WIDEN);
2005 /* Return 1 if it is possible to return using a single instruction.
2006 If SIBLING is non-null, this is a test for a return before a sibling
2007 call. SIBLING is the call insn, so we can examine its register usage. */
2010 use_return_insn (int iscond, rtx sibling)
2013 unsigned int func_type;
2014 unsigned long saved_int_regs;
2015 unsigned HOST_WIDE_INT stack_adjust;
2016 arm_stack_offsets *offsets;
2018 /* Never use a return instruction before reload has run. */
2019 if (!reload_completed)
2022 func_type = arm_current_func_type ();
2024 /* Naked, volatile and stack alignment functions need special
2026 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2029 /* So do interrupt functions that use the frame pointer and Thumb
2030 interrupt functions. */
2031 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2034 offsets = arm_get_frame_offsets ();
2035 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2037 /* As do variadic functions. */
2038 if (crtl->args.pretend_args_size
2039 || cfun->machine->uses_anonymous_args
2040 /* Or if the function calls __builtin_eh_return () */
2041 || crtl->calls_eh_return
2042 /* Or if the function calls alloca */
2043 || cfun->calls_alloca
2044 /* Or if there is a stack adjustment. However, if the stack pointer
2045 is saved on the stack, we can use a pre-incrementing stack load. */
2046 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2047 && stack_adjust == 4)))
2050 saved_int_regs = offsets->saved_regs_mask;
2052 /* Unfortunately, the insn
2054 ldmib sp, {..., sp, ...}
2056 triggers a bug on most SA-110 based devices, such that the stack
2057 pointer won't be correctly restored if the instruction takes a
2058 page fault. We work around this problem by popping r3 along with
2059 the other registers, since that is never slower than executing
2060 another instruction.
2062 We test for !arm_arch5 here, because code for any architecture
2063 less than this could potentially be run on one of the buggy
2065 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2067 /* Validate that r3 is a call-clobbered register (always true in
2068 the default abi) ... */
2069 if (!call_used_regs[3])
2072 /* ... that it isn't being used for a return value ... */
2073 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2076 /* ... or for a tail-call argument ... */
2079 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2081 if (find_regno_fusage (sibling, USE, 3))
2085 /* ... and that there are no call-saved registers in r0-r2
2086 (always true in the default ABI). */
2087 if (saved_int_regs & 0x7)
2091 /* Can't be done if interworking with Thumb, and any registers have been
2093 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2096 /* On StrongARM, conditional returns are expensive if they aren't
2097 taken and multiple registers have been stacked. */
2098 if (iscond && arm_tune_strongarm)
2100 /* Conditional return when just the LR is stored is a simple
2101 conditional-load instruction, that's not expensive. */
2102 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2106 && arm_pic_register != INVALID_REGNUM
2107 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2111 /* If there are saved registers but the LR isn't saved, then we need
2112 two instructions for the return. */
2113 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2116 /* Can't be done if any of the FPA regs are pushed,
2117 since this also requires an insn. */
2118 if (TARGET_HARD_FLOAT && TARGET_FPA)
2119 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2120 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2123 /* Likewise VFP regs. */
2124 if (TARGET_HARD_FLOAT && TARGET_VFP)
2125 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2126 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2129 if (TARGET_REALLY_IWMMXT)
2130 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2131 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2137 /* Return TRUE if int I is a valid immediate ARM constant. */
2140 const_ok_for_arm (HOST_WIDE_INT i)
2144 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2145 be all zero, or all one. */
2146 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2147 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2148 != ((~(unsigned HOST_WIDE_INT) 0)
2149 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2152 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2154 /* Fast return for 0 and small values. We must do this for zero, since
2155 the code below can't handle that one case. */
2156 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2159 /* Get the number of trailing zeros. */
2160 lowbit = ffs((int) i) - 1;
2162 /* Only even shifts are allowed in ARM mode so round down to the
2163 nearest even number. */
2167 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2172 /* Allow rotated constants in ARM mode. */
2174 && ((i & ~0xc000003f) == 0
2175 || (i & ~0xf000000f) == 0
2176 || (i & ~0xfc000003) == 0))
2183 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2186 if (i == v || i == (v | (v << 8)))
2189 /* Allow repeated pattern 0xXY00XY00. */
2199 /* Return true if I is a valid constant for the operation CODE. */
2201 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2203 if (const_ok_for_arm (i))
2209 /* See if we can use movw. */
2210 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2234 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2236 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2242 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2246 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2253 /* Emit a sequence of insns to handle a large constant.
2254 CODE is the code of the operation required, it can be any of SET, PLUS,
2255 IOR, AND, XOR, MINUS;
2256 MODE is the mode in which the operation is being performed;
2257 VAL is the integer to operate on;
2258 SOURCE is the other operand (a register, or a null-pointer for SET);
2259 SUBTARGETS means it is safe to create scratch registers if that will
2260 either produce a simpler sequence, or we will want to cse the values.
2261 Return value is the number of insns emitted. */
2263 /* ??? Tweak this for thumb2. */
2265 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2266 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2270 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2271 cond = COND_EXEC_TEST (PATTERN (insn));
2275 if (subtargets || code == SET
2276 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2277 && REGNO (target) != REGNO (source)))
2279 /* After arm_reorg has been called, we can't fix up expensive
2280 constants by pushing them into memory so we must synthesize
2281 them in-line, regardless of the cost. This is only likely to
2282 be more costly on chips that have load delay slots and we are
2283 compiling without running the scheduler (so no splitting
2284 occurred before the final instruction emission).
2286 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2288 if (!after_arm_reorg
2290 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2292 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2297 /* Currently SET is the only monadic value for CODE, all
2298 the rest are diadic. */
2299 if (TARGET_USE_MOVT)
2300 arm_emit_movpair (target, GEN_INT (val));
2302 emit_set_insn (target, GEN_INT (val));
2308 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2310 if (TARGET_USE_MOVT)
2311 arm_emit_movpair (temp, GEN_INT (val));
2313 emit_set_insn (temp, GEN_INT (val));
2315 /* For MINUS, the value is subtracted from, since we never
2316 have subtraction of a constant. */
2318 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2320 emit_set_insn (target,
2321 gen_rtx_fmt_ee (code, mode, source, temp));
2327 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2331 /* Return the number of instructions required to synthesize the given
2332 constant, if we start emitting them from bit-position I. */
2334 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2336 HOST_WIDE_INT temp1;
2337 int step_size = TARGET_ARM ? 2 : 1;
2340 gcc_assert (TARGET_ARM || i == 0);
2348 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2353 temp1 = remainder & ((0x0ff << end)
2354 | ((i < end) ? (0xff >> (32 - end)) : 0));
2355 remainder &= ~temp1;
2360 } while (remainder);
2365 find_best_start (unsigned HOST_WIDE_INT remainder)
2367 int best_consecutive_zeros = 0;
2371 /* If we aren't targetting ARM, the best place to start is always at
2376 for (i = 0; i < 32; i += 2)
2378 int consecutive_zeros = 0;
2380 if (!(remainder & (3 << i)))
2382 while ((i < 32) && !(remainder & (3 << i)))
2384 consecutive_zeros += 2;
2387 if (consecutive_zeros > best_consecutive_zeros)
2389 best_consecutive_zeros = consecutive_zeros;
2390 best_start = i - consecutive_zeros;
2396 /* So long as it won't require any more insns to do so, it's
2397 desirable to emit a small constant (in bits 0...9) in the last
2398 insn. This way there is more chance that it can be combined with
2399 a later addressing insn to form a pre-indexed load or store
2400 operation. Consider:
2402 *((volatile int *)0xe0000100) = 1;
2403 *((volatile int *)0xe0000110) = 2;
2405 We want this to wind up as:
2409 str rB, [rA, #0x100]
2411 str rB, [rA, #0x110]
2413 rather than having to synthesize both large constants from scratch.
2415 Therefore, we calculate how many insns would be required to emit
2416 the constant starting from `best_start', and also starting from
2417 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2418 yield a shorter sequence, we may as well use zero. */
2420 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2421 && (count_insns_for_constant (remainder, 0) <=
2422 count_insns_for_constant (remainder, best_start)))
2428 /* Emit an instruction with the indicated PATTERN. If COND is
2429 non-NULL, conditionalize the execution of the instruction on COND
2433 emit_constant_insn (rtx cond, rtx pattern)
2436 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2437 emit_insn (pattern);
2440 /* As above, but extra parameter GENERATE which, if clear, suppresses
2442 /* ??? This needs more work for thumb2. */
2445 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2446 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2451 int final_invert = 0;
2453 int num_bits_set = 0;
2454 int set_sign_bit_copies = 0;
2455 int clear_sign_bit_copies = 0;
2456 int clear_zero_bit_copies = 0;
2457 int set_zero_bit_copies = 0;
2459 unsigned HOST_WIDE_INT temp1, temp2;
2460 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2461 int step_size = TARGET_ARM ? 2 : 1;
2463 /* Find out which operations are safe for a given CODE. Also do a quick
2464 check for degenerate cases; these can occur when DImode operations
2478 if (remainder == 0xffffffff)
2481 emit_constant_insn (cond,
2482 gen_rtx_SET (VOIDmode, target,
2483 GEN_INT (ARM_SIGN_EXTEND (val))));
2489 if (reload_completed && rtx_equal_p (target, source))
2493 emit_constant_insn (cond,
2494 gen_rtx_SET (VOIDmode, target, source));
2503 emit_constant_insn (cond,
2504 gen_rtx_SET (VOIDmode, target, const0_rtx));
2507 if (remainder == 0xffffffff)
2509 if (reload_completed && rtx_equal_p (target, source))
2512 emit_constant_insn (cond,
2513 gen_rtx_SET (VOIDmode, target, source));
2522 if (reload_completed && rtx_equal_p (target, source))
2525 emit_constant_insn (cond,
2526 gen_rtx_SET (VOIDmode, target, source));
2530 if (remainder == 0xffffffff)
2533 emit_constant_insn (cond,
2534 gen_rtx_SET (VOIDmode, target,
2535 gen_rtx_NOT (mode, source)));
2541 /* We treat MINUS as (val - source), since (source - val) is always
2542 passed as (source + (-val)). */
2546 emit_constant_insn (cond,
2547 gen_rtx_SET (VOIDmode, target,
2548 gen_rtx_NEG (mode, source)));
2551 if (const_ok_for_arm (val))
2554 emit_constant_insn (cond,
2555 gen_rtx_SET (VOIDmode, target,
2556 gen_rtx_MINUS (mode, GEN_INT (val),
2568 /* If we can do it in one insn get out quickly. */
2569 if (const_ok_for_op (val, code))
2572 emit_constant_insn (cond,
2573 gen_rtx_SET (VOIDmode, target,
2575 ? gen_rtx_fmt_ee (code, mode, source,
2581 /* Calculate a few attributes that may be useful for specific
2583 /* Count number of leading zeros. */
2584 for (i = 31; i >= 0; i--)
2586 if ((remainder & (1 << i)) == 0)
2587 clear_sign_bit_copies++;
2592 /* Count number of leading 1's. */
2593 for (i = 31; i >= 0; i--)
2595 if ((remainder & (1 << i)) != 0)
2596 set_sign_bit_copies++;
2601 /* Count number of trailing zero's. */
2602 for (i = 0; i <= 31; i++)
2604 if ((remainder & (1 << i)) == 0)
2605 clear_zero_bit_copies++;
2610 /* Count number of trailing 1's. */
2611 for (i = 0; i <= 31; i++)
2613 if ((remainder & (1 << i)) != 0)
2614 set_zero_bit_copies++;
2622 /* See if we can do this by sign_extending a constant that is known
2623 to be negative. This is a good, way of doing it, since the shift
2624 may well merge into a subsequent insn. */
2625 if (set_sign_bit_copies > 1)
2627 if (const_ok_for_arm
2628 (temp1 = ARM_SIGN_EXTEND (remainder
2629 << (set_sign_bit_copies - 1))))
2633 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2634 emit_constant_insn (cond,
2635 gen_rtx_SET (VOIDmode, new_src,
2637 emit_constant_insn (cond,
2638 gen_ashrsi3 (target, new_src,
2639 GEN_INT (set_sign_bit_copies - 1)));
2643 /* For an inverted constant, we will need to set the low bits,
2644 these will be shifted out of harm's way. */
2645 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2646 if (const_ok_for_arm (~temp1))
2650 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2651 emit_constant_insn (cond,
2652 gen_rtx_SET (VOIDmode, new_src,
2654 emit_constant_insn (cond,
2655 gen_ashrsi3 (target, new_src,
2656 GEN_INT (set_sign_bit_copies - 1)));
2662 /* See if we can calculate the value as the difference between two
2663 valid immediates. */
2664 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2666 int topshift = clear_sign_bit_copies & ~1;
2668 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2669 & (0xff000000 >> topshift));
2671 /* If temp1 is zero, then that means the 9 most significant
2672 bits of remainder were 1 and we've caused it to overflow.
2673 When topshift is 0 we don't need to do anything since we
2674 can borrow from 'bit 32'. */
2675 if (temp1 == 0 && topshift != 0)
2676 temp1 = 0x80000000 >> (topshift - 1);
2678 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2680 if (const_ok_for_arm (temp2))
2684 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2685 emit_constant_insn (cond,
2686 gen_rtx_SET (VOIDmode, new_src,
2688 emit_constant_insn (cond,
2689 gen_addsi3 (target, new_src,
2697 /* See if we can generate this by setting the bottom (or the top)
2698 16 bits, and then shifting these into the other half of the
2699 word. We only look for the simplest cases, to do more would cost
2700 too much. Be careful, however, not to generate this when the
2701 alternative would take fewer insns. */
2702 if (val & 0xffff0000)
2704 temp1 = remainder & 0xffff0000;
2705 temp2 = remainder & 0x0000ffff;
2707 /* Overlaps outside this range are best done using other methods. */
2708 for (i = 9; i < 24; i++)
2710 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2711 && !const_ok_for_arm (temp2))
2713 rtx new_src = (subtargets
2714 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2716 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2717 source, subtargets, generate);
2725 gen_rtx_ASHIFT (mode, source,
2732 /* Don't duplicate cases already considered. */
2733 for (i = 17; i < 24; i++)
2735 if (((temp1 | (temp1 >> i)) == remainder)
2736 && !const_ok_for_arm (temp1))
2738 rtx new_src = (subtargets
2739 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2741 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2742 source, subtargets, generate);
2747 gen_rtx_SET (VOIDmode, target,
2750 gen_rtx_LSHIFTRT (mode, source,
2761 /* If we have IOR or XOR, and the constant can be loaded in a
2762 single instruction, and we can find a temporary to put it in,
2763 then this can be done in two instructions instead of 3-4. */
2765 /* TARGET can't be NULL if SUBTARGETS is 0 */
2766 || (reload_completed && !reg_mentioned_p (target, source)))
2768 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2772 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2774 emit_constant_insn (cond,
2775 gen_rtx_SET (VOIDmode, sub,
2777 emit_constant_insn (cond,
2778 gen_rtx_SET (VOIDmode, target,
2779 gen_rtx_fmt_ee (code, mode,
2790 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2791 and the remainder 0s for e.g. 0xfff00000)
2792 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2794 This can be done in 2 instructions by using shifts with mov or mvn.
2799 mvn r0, r0, lsr #12 */
2800 if (set_sign_bit_copies > 8
2801 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2805 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2806 rtx shift = GEN_INT (set_sign_bit_copies);
2810 gen_rtx_SET (VOIDmode, sub,
2812 gen_rtx_ASHIFT (mode,
2817 gen_rtx_SET (VOIDmode, target,
2819 gen_rtx_LSHIFTRT (mode, sub,
2826 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2828 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2830 For eg. r0 = r0 | 0xfff
2835 if (set_zero_bit_copies > 8
2836 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2840 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2841 rtx shift = GEN_INT (set_zero_bit_copies);
2845 gen_rtx_SET (VOIDmode, sub,
2847 gen_rtx_LSHIFTRT (mode,
2852 gen_rtx_SET (VOIDmode, target,
2854 gen_rtx_ASHIFT (mode, sub,
2860 /* This will never be reached for Thumb2 because orn is a valid
2861 instruction. This is for Thumb1 and the ARM 32 bit cases.
2863 x = y | constant (such that ~constant is a valid constant)
2865 x = ~(~y & ~constant).
2867 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2871 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2872 emit_constant_insn (cond,
2873 gen_rtx_SET (VOIDmode, sub,
2874 gen_rtx_NOT (mode, source)));
2877 sub = gen_reg_rtx (mode);
2878 emit_constant_insn (cond,
2879 gen_rtx_SET (VOIDmode, sub,
2880 gen_rtx_AND (mode, source,
2882 emit_constant_insn (cond,
2883 gen_rtx_SET (VOIDmode, target,
2884 gen_rtx_NOT (mode, sub)));
2891 /* See if two shifts will do 2 or more insn's worth of work. */
2892 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2894 HOST_WIDE_INT shift_mask = ((0xffffffff
2895 << (32 - clear_sign_bit_copies))
2898 if ((remainder | shift_mask) != 0xffffffff)
2902 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2903 insns = arm_gen_constant (AND, mode, cond,
2904 remainder | shift_mask,
2905 new_src, source, subtargets, 1);
2910 rtx targ = subtargets ? NULL_RTX : target;
2911 insns = arm_gen_constant (AND, mode, cond,
2912 remainder | shift_mask,
2913 targ, source, subtargets, 0);
2919 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2920 rtx shift = GEN_INT (clear_sign_bit_copies);
2922 emit_insn (gen_ashlsi3 (new_src, source, shift));
2923 emit_insn (gen_lshrsi3 (target, new_src, shift));
2929 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2931 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2933 if ((remainder | shift_mask) != 0xffffffff)
2937 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2939 insns = arm_gen_constant (AND, mode, cond,
2940 remainder | shift_mask,
2941 new_src, source, subtargets, 1);
2946 rtx targ = subtargets ? NULL_RTX : target;
2948 insns = arm_gen_constant (AND, mode, cond,
2949 remainder | shift_mask,
2950 targ, source, subtargets, 0);
2956 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2957 rtx shift = GEN_INT (clear_zero_bit_copies);
2959 emit_insn (gen_lshrsi3 (new_src, source, shift));
2960 emit_insn (gen_ashlsi3 (target, new_src, shift));
2972 for (i = 0; i < 32; i++)
2973 if (remainder & (1 << i))
2976 if ((code == AND) || (can_invert && num_bits_set > 16))
2977 remainder ^= 0xffffffff;
2978 else if (code == PLUS && num_bits_set > 16)
2979 remainder = (-remainder) & 0xffffffff;
2981 /* For XOR, if more than half the bits are set and there's a sequence
2982 of more than 8 consecutive ones in the pattern then we can XOR by the
2983 inverted constant and then invert the final result; this may save an
2984 instruction and might also lead to the final mvn being merged with
2985 some other operation. */
2986 else if (code == XOR && num_bits_set > 16
2987 && (count_insns_for_constant (remainder ^ 0xffffffff,
2989 (remainder ^ 0xffffffff))
2990 < count_insns_for_constant (remainder,
2991 find_best_start (remainder))))
2993 remainder ^= 0xffffffff;
3002 /* Now try and find a way of doing the job in either two or three
3004 We start by looking for the largest block of zeros that are aligned on
3005 a 2-bit boundary, we then fill up the temps, wrapping around to the
3006 top of the word when we drop off the bottom.
3007 In the worst case this code should produce no more than four insns.
3008 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3009 best place to start. */
3011 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3014 /* Now start emitting the insns. */
3015 i = find_best_start (remainder);
3022 if (remainder & (3 << (i - 2)))
3027 temp1 = remainder & ((0x0ff << end)
3028 | ((i < end) ? (0xff >> (32 - end)) : 0));
3029 remainder &= ~temp1;
3033 rtx new_src, temp1_rtx;
3035 if (code == SET || code == MINUS)
3037 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3038 if (can_invert && code != MINUS)
3043 if ((final_invert || remainder) && subtargets)
3044 new_src = gen_reg_rtx (mode);
3049 else if (can_negate)
3053 temp1 = trunc_int_for_mode (temp1, mode);
3054 temp1_rtx = GEN_INT (temp1);
3058 else if (code == MINUS)
3059 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3061 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3063 emit_constant_insn (cond,
3064 gen_rtx_SET (VOIDmode, new_src,
3074 else if (code == MINUS)
3080 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3090 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3091 gen_rtx_NOT (mode, source)));
3098 /* Canonicalize a comparison so that we are more likely to recognize it.
3099 This can be done for a few constant compares, where we can make the
3100 immediate value easier to load. */
3103 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3105 enum machine_mode mode;
3106 unsigned HOST_WIDE_INT i, maxval;
3108 mode = GET_MODE (*op0);
3109 if (mode == VOIDmode)
3110 mode = GET_MODE (*op1);
3112 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3114 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3115 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3116 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3117 for GTU/LEU in Thumb mode. */
3122 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3124 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3127 if (code == GT || code == LE
3128 || (!TARGET_ARM && (code == GTU || code == LEU)))
3130 /* Missing comparison. First try to use an available
3132 if (GET_CODE (*op1) == CONST_INT)
3140 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3142 *op1 = GEN_INT (i + 1);
3143 return code == GT ? GE : LT;
3148 if (i != ~((unsigned HOST_WIDE_INT) 0)
3149 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3151 *op1 = GEN_INT (i + 1);
3152 return code == GTU ? GEU : LTU;
3160 /* If that did not work, reverse the condition. */
3164 return swap_condition (code);
3170 /* Comparisons smaller than DImode. Only adjust comparisons against
3171 an out-of-range constant. */
3172 if (GET_CODE (*op1) != CONST_INT
3173 || const_ok_for_arm (INTVAL (*op1))
3174 || const_ok_for_arm (- INTVAL (*op1)))
3188 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3190 *op1 = GEN_INT (i + 1);
3191 return code == GT ? GE : LT;
3198 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3200 *op1 = GEN_INT (i - 1);
3201 return code == GE ? GT : LE;
3207 if (i != ~((unsigned HOST_WIDE_INT) 0)
3208 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3210 *op1 = GEN_INT (i + 1);
3211 return code == GTU ? GEU : LTU;
3218 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3220 *op1 = GEN_INT (i - 1);
3221 return code == GEU ? GTU : LEU;
3233 /* Define how to find the value returned by a function. */
3236 arm_function_value(const_tree type, const_tree func,
3237 bool outgoing ATTRIBUTE_UNUSED)
3239 enum machine_mode mode;
3240 int unsignedp ATTRIBUTE_UNUSED;
3241 rtx r ATTRIBUTE_UNUSED;
3243 mode = TYPE_MODE (type);
3245 if (TARGET_AAPCS_BASED)
3246 return aapcs_allocate_return_reg (mode, type, func);
3248 /* Promote integer types. */
3249 if (INTEGRAL_TYPE_P (type))
3250 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3252 /* Promotes small structs returned in a register to full-word size
3253 for big-endian AAPCS. */
3254 if (arm_return_in_msb (type))
3256 HOST_WIDE_INT size = int_size_in_bytes (type);
3257 if (size % UNITS_PER_WORD != 0)
3259 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3260 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3264 return LIBCALL_VALUE (mode);
3268 libcall_eq (const void *p1, const void *p2)
3270 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3274 libcall_hash (const void *p1)
3276 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3280 add_libcall (htab_t htab, rtx libcall)
3282 *htab_find_slot (htab, libcall, INSERT) = libcall;
3286 arm_libcall_uses_aapcs_base (const_rtx libcall)
3288 static bool init_done = false;
3289 static htab_t libcall_htab;
3295 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3297 add_libcall (libcall_htab,
3298 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3299 add_libcall (libcall_htab,
3300 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3301 add_libcall (libcall_htab,
3302 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3303 add_libcall (libcall_htab,
3304 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3306 add_libcall (libcall_htab,
3307 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3308 add_libcall (libcall_htab,
3309 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3310 add_libcall (libcall_htab,
3311 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3312 add_libcall (libcall_htab,
3313 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3315 add_libcall (libcall_htab,
3316 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3317 add_libcall (libcall_htab,
3318 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3319 add_libcall (libcall_htab,
3320 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3321 add_libcall (libcall_htab,
3322 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3323 add_libcall (libcall_htab,
3324 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3325 add_libcall (libcall_htab,
3326 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3329 return libcall && htab_find (libcall_htab, libcall) != NULL;
3333 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3335 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3336 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3338 /* The following libcalls return their result in integer registers,
3339 even though they return a floating point value. */
3340 if (arm_libcall_uses_aapcs_base (libcall))
3341 return gen_rtx_REG (mode, ARG_REGISTER(1));
3345 return LIBCALL_VALUE (mode);
3348 /* Determine the amount of memory needed to store the possible return
3349 registers of an untyped call. */
3351 arm_apply_result_size (void)
3357 if (TARGET_HARD_FLOAT_ABI)
3363 if (TARGET_MAVERICK)
3366 if (TARGET_IWMMXT_ABI)
3373 /* Decide whether TYPE should be returned in memory (true)
3374 or in a register (false). FNTYPE is the type of the function making
3377 arm_return_in_memory (const_tree type, const_tree fntype)
3381 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3383 if (TARGET_AAPCS_BASED)
3385 /* Simple, non-aggregate types (ie not including vectors and
3386 complex) are always returned in a register (or registers).
3387 We don't care about which register here, so we can short-cut
3388 some of the detail. */
3389 if (!AGGREGATE_TYPE_P (type)
3390 && TREE_CODE (type) != VECTOR_TYPE
3391 && TREE_CODE (type) != COMPLEX_TYPE)
3394 /* Any return value that is no larger than one word can be
3396 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3399 /* Check any available co-processors to see if they accept the
3400 type as a register candidate (VFP, for example, can return
3401 some aggregates in consecutive registers). These aren't
3402 available if the call is variadic. */
3403 if (aapcs_select_return_coproc (type, fntype) >= 0)
3406 /* Vector values should be returned using ARM registers, not
3407 memory (unless they're over 16 bytes, which will break since
3408 we only have four call-clobbered registers to play with). */
3409 if (TREE_CODE (type) == VECTOR_TYPE)
3410 return (size < 0 || size > (4 * UNITS_PER_WORD));
3412 /* The rest go in memory. */
3416 if (TREE_CODE (type) == VECTOR_TYPE)
3417 return (size < 0 || size > (4 * UNITS_PER_WORD));
3419 if (!AGGREGATE_TYPE_P (type) &&
3420 (TREE_CODE (type) != VECTOR_TYPE))
3421 /* All simple types are returned in registers. */
3424 if (arm_abi != ARM_ABI_APCS)
3426 /* ATPCS and later return aggregate types in memory only if they are
3427 larger than a word (or are variable size). */
3428 return (size < 0 || size > UNITS_PER_WORD);
3431 /* For the arm-wince targets we choose to be compatible with Microsoft's
3432 ARM and Thumb compilers, which always return aggregates in memory. */
3434 /* All structures/unions bigger than one word are returned in memory.
3435 Also catch the case where int_size_in_bytes returns -1. In this case
3436 the aggregate is either huge or of variable size, and in either case
3437 we will want to return it via memory and not in a register. */
3438 if (size < 0 || size > UNITS_PER_WORD)
3441 if (TREE_CODE (type) == RECORD_TYPE)
3445 /* For a struct the APCS says that we only return in a register
3446 if the type is 'integer like' and every addressable element
3447 has an offset of zero. For practical purposes this means
3448 that the structure can have at most one non bit-field element
3449 and that this element must be the first one in the structure. */
3451 /* Find the first field, ignoring non FIELD_DECL things which will
3452 have been created by C++. */
3453 for (field = TYPE_FIELDS (type);
3454 field && TREE_CODE (field) != FIELD_DECL;
3455 field = DECL_CHAIN (field))
3459 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3461 /* Check that the first field is valid for returning in a register. */
3463 /* ... Floats are not allowed */
3464 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3467 /* ... Aggregates that are not themselves valid for returning in
3468 a register are not allowed. */
3469 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3472 /* Now check the remaining fields, if any. Only bitfields are allowed,
3473 since they are not addressable. */
3474 for (field = DECL_CHAIN (field);
3476 field = DECL_CHAIN (field))
3478 if (TREE_CODE (field) != FIELD_DECL)
3481 if (!DECL_BIT_FIELD_TYPE (field))
3488 if (TREE_CODE (type) == UNION_TYPE)
3492 /* Unions can be returned in registers if every element is
3493 integral, or can be returned in an integer register. */
3494 for (field = TYPE_FIELDS (type);
3496 field = DECL_CHAIN (field))
3498 if (TREE_CODE (field) != FIELD_DECL)
3501 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3504 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3510 #endif /* not ARM_WINCE */
3512 /* Return all other types in memory. */
3516 /* Indicate whether or not words of a double are in big-endian order. */
3519 arm_float_words_big_endian (void)
3521 if (TARGET_MAVERICK)
3524 /* For FPA, float words are always big-endian. For VFP, floats words
3525 follow the memory system mode. */
3533 return (TARGET_BIG_END ? 1 : 0);
3538 const struct pcs_attribute_arg
3542 } pcs_attribute_args[] =
3544 {"aapcs", ARM_PCS_AAPCS},
3545 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3547 /* We could recognize these, but changes would be needed elsewhere
3548 * to implement them. */
3549 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3550 {"atpcs", ARM_PCS_ATPCS},
3551 {"apcs", ARM_PCS_APCS},
3553 {NULL, ARM_PCS_UNKNOWN}
3557 arm_pcs_from_attribute (tree attr)
3559 const struct pcs_attribute_arg *ptr;
3562 /* Get the value of the argument. */
3563 if (TREE_VALUE (attr) == NULL_TREE
3564 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3565 return ARM_PCS_UNKNOWN;
3567 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3569 /* Check it against the list of known arguments. */
3570 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3571 if (streq (arg, ptr->arg))
3574 /* An unrecognized interrupt type. */
3575 return ARM_PCS_UNKNOWN;
3578 /* Get the PCS variant to use for this call. TYPE is the function's type
3579 specification, DECL is the specific declartion. DECL may be null if
3580 the call could be indirect or if this is a library call. */
3582 arm_get_pcs_model (const_tree type, const_tree decl)
3584 bool user_convention = false;
3585 enum arm_pcs user_pcs = arm_pcs_default;
3590 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3593 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3594 user_convention = true;
3597 if (TARGET_AAPCS_BASED)
3599 /* Detect varargs functions. These always use the base rules
3600 (no argument is ever a candidate for a co-processor
3602 bool base_rules = stdarg_p (type);
3604 if (user_convention)
3606 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3607 sorry ("non-AAPCS derived PCS variant");
3608 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3609 error ("variadic functions must use the base AAPCS variant");
3613 return ARM_PCS_AAPCS;
3614 else if (user_convention)
3616 else if (decl && flag_unit_at_a_time)
3618 /* Local functions never leak outside this compilation unit,
3619 so we are free to use whatever conventions are
3621 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3622 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3624 return ARM_PCS_AAPCS_LOCAL;
3627 else if (user_convention && user_pcs != arm_pcs_default)
3628 sorry ("PCS variant");
3630 /* For everything else we use the target's default. */
3631 return arm_pcs_default;
3636 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3637 const_tree fntype ATTRIBUTE_UNUSED,
3638 rtx libcall ATTRIBUTE_UNUSED,
3639 const_tree fndecl ATTRIBUTE_UNUSED)
3641 /* Record the unallocated VFP registers. */
3642 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3643 pcum->aapcs_vfp_reg_alloc = 0;
3646 /* Walk down the type tree of TYPE counting consecutive base elements.
3647 If *MODEP is VOIDmode, then set it to the first valid floating point
3648 type. If a non-floating point type is found, or if a floating point
3649 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3650 otherwise return the count in the sub-tree. */
3652 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3654 enum machine_mode mode;
3657 switch (TREE_CODE (type))
3660 mode = TYPE_MODE (type);
3661 if (mode != DFmode && mode != SFmode)
3664 if (*modep == VOIDmode)
3673 mode = TYPE_MODE (TREE_TYPE (type));
3674 if (mode != DFmode && mode != SFmode)
3677 if (*modep == VOIDmode)
3686 /* Use V2SImode and V4SImode as representatives of all 64-bit
3687 and 128-bit vector types, whether or not those modes are
3688 supported with the present options. */
3689 size = int_size_in_bytes (type);
3702 if (*modep == VOIDmode)
3705 /* Vector modes are considered to be opaque: two vectors are
3706 equivalent for the purposes of being homogeneous aggregates
3707 if they are the same size. */
3716 tree index = TYPE_DOMAIN (type);
3718 /* Can't handle incomplete types. */
3719 if (!COMPLETE_TYPE_P(type))
3722 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3725 || !TYPE_MAX_VALUE (index)
3726 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3727 || !TYPE_MIN_VALUE (index)
3728 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3732 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3733 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3735 /* There must be no padding. */
3736 if (!host_integerp (TYPE_SIZE (type), 1)
3737 || (tree_low_cst (TYPE_SIZE (type), 1)
3738 != count * GET_MODE_BITSIZE (*modep)))
3750 /* Can't handle incomplete types. */
3751 if (!COMPLETE_TYPE_P(type))
3754 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3756 if (TREE_CODE (field) != FIELD_DECL)
3759 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3765 /* There must be no padding. */
3766 if (!host_integerp (TYPE_SIZE (type), 1)
3767 || (tree_low_cst (TYPE_SIZE (type), 1)
3768 != count * GET_MODE_BITSIZE (*modep)))
3775 case QUAL_UNION_TYPE:
3777 /* These aren't very interesting except in a degenerate case. */
3782 /* Can't handle incomplete types. */
3783 if (!COMPLETE_TYPE_P(type))
3786 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3788 if (TREE_CODE (field) != FIELD_DECL)
3791 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3794 count = count > sub_count ? count : sub_count;
3797 /* There must be no padding. */
3798 if (!host_integerp (TYPE_SIZE (type), 1)
3799 || (tree_low_cst (TYPE_SIZE (type), 1)
3800 != count * GET_MODE_BITSIZE (*modep)))
3813 /* Return true if PCS_VARIANT should use VFP registers. */
3815 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3817 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3819 static bool seen_thumb1_vfp = false;
3821 if (TARGET_THUMB1 && !seen_thumb1_vfp)
3823 sorry ("Thumb-1 hard-float VFP ABI");
3824 /* sorry() is not immediately fatal, so only display this once. */
3825 seen_thumb1_vfp = true;
3831 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3834 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3835 (TARGET_VFP_DOUBLE || !is_double));
3839 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3840 enum machine_mode mode, const_tree type,
3841 enum machine_mode *base_mode, int *count)
3843 enum machine_mode new_mode = VOIDmode;
3845 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3846 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3847 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3852 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3855 new_mode = (mode == DCmode ? DFmode : SFmode);
3857 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3859 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3861 if (ag_count > 0 && ag_count <= 4)
3870 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3873 *base_mode = new_mode;
3878 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3879 enum machine_mode mode, const_tree type)
3881 int count ATTRIBUTE_UNUSED;
3882 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3884 if (!use_vfp_abi (pcs_variant, false))
3886 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3891 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3894 if (!use_vfp_abi (pcum->pcs_variant, false))
3897 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
3898 &pcum->aapcs_vfp_rmode,
3899 &pcum->aapcs_vfp_rcount);
3903 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3904 const_tree type ATTRIBUTE_UNUSED)
3906 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3907 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3910 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3911 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3913 pcum->aapcs_vfp_reg_alloc = mask << regno;
3914 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3917 int rcount = pcum->aapcs_vfp_rcount;
3919 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
3923 /* Avoid using unsupported vector modes. */
3924 if (rmode == V2SImode)
3926 else if (rmode == V4SImode)
3933 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
3934 for (i = 0; i < rcount; i++)
3936 rtx tmp = gen_rtx_REG (rmode,
3937 FIRST_VFP_REGNUM + regno + i * rshift);
3938 tmp = gen_rtx_EXPR_LIST
3940 GEN_INT (i * GET_MODE_SIZE (rmode)));
3941 XVECEXP (par, 0, i) = tmp;
3944 pcum->aapcs_reg = par;
3947 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
3954 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
3955 enum machine_mode mode,
3956 const_tree type ATTRIBUTE_UNUSED)
3958 if (!use_vfp_abi (pcs_variant, false))
3961 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3964 enum machine_mode ag_mode;
3969 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3974 if (ag_mode == V2SImode)
3976 else if (ag_mode == V4SImode)
3982 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
3983 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
3984 for (i = 0; i < count; i++)
3986 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
3987 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
3988 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
3989 XVECEXP (par, 0, i) = tmp;
3995 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
3999 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4000 enum machine_mode mode ATTRIBUTE_UNUSED,
4001 const_tree type ATTRIBUTE_UNUSED)
4003 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4004 pcum->aapcs_vfp_reg_alloc = 0;
4008 #define AAPCS_CP(X) \
4010 aapcs_ ## X ## _cum_init, \
4011 aapcs_ ## X ## _is_call_candidate, \
4012 aapcs_ ## X ## _allocate, \
4013 aapcs_ ## X ## _is_return_candidate, \
4014 aapcs_ ## X ## _allocate_return_reg, \
4015 aapcs_ ## X ## _advance \
4018 /* Table of co-processors that can be used to pass arguments in
4019 registers. Idealy no arugment should be a candidate for more than
4020 one co-processor table entry, but the table is processed in order
4021 and stops after the first match. If that entry then fails to put
4022 the argument into a co-processor register, the argument will go on
4026 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4027 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4029 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4030 BLKmode) is a candidate for this co-processor's registers; this
4031 function should ignore any position-dependent state in
4032 CUMULATIVE_ARGS and only use call-type dependent information. */
4033 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4035 /* Return true if the argument does get a co-processor register; it
4036 should set aapcs_reg to an RTX of the register allocated as is
4037 required for a return from FUNCTION_ARG. */
4038 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4040 /* Return true if a result of mode MODE (or type TYPE if MODE is
4041 BLKmode) is can be returned in this co-processor's registers. */
4042 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4044 /* Allocate and return an RTX element to hold the return type of a
4045 call, this routine must not fail and will only be called if
4046 is_return_candidate returned true with the same parameters. */
4047 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4049 /* Finish processing this argument and prepare to start processing
4051 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4052 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4060 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4065 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4066 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4073 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4075 /* We aren't passed a decl, so we can't check that a call is local.
4076 However, it isn't clear that that would be a win anyway, since it
4077 might limit some tail-calling opportunities. */
4078 enum arm_pcs pcs_variant;
4082 const_tree fndecl = NULL_TREE;
4084 if (TREE_CODE (fntype) == FUNCTION_DECL)
4087 fntype = TREE_TYPE (fntype);
4090 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4093 pcs_variant = arm_pcs_default;
4095 if (pcs_variant != ARM_PCS_AAPCS)
4099 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4100 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4109 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4112 /* We aren't passed a decl, so we can't check that a call is local.
4113 However, it isn't clear that that would be a win anyway, since it
4114 might limit some tail-calling opportunities. */
4115 enum arm_pcs pcs_variant;
4116 int unsignedp ATTRIBUTE_UNUSED;
4120 const_tree fndecl = NULL_TREE;
4122 if (TREE_CODE (fntype) == FUNCTION_DECL)
4125 fntype = TREE_TYPE (fntype);
4128 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4131 pcs_variant = arm_pcs_default;
4133 /* Promote integer types. */
4134 if (type && INTEGRAL_TYPE_P (type))
4135 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4137 if (pcs_variant != ARM_PCS_AAPCS)
4141 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4142 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4144 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4148 /* Promotes small structs returned in a register to full-word size
4149 for big-endian AAPCS. */
4150 if (type && arm_return_in_msb (type))
4152 HOST_WIDE_INT size = int_size_in_bytes (type);
4153 if (size % UNITS_PER_WORD != 0)
4155 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4156 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4160 return gen_rtx_REG (mode, R0_REGNUM);
4164 aapcs_libcall_value (enum machine_mode mode)
4166 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4169 /* Lay out a function argument using the AAPCS rules. The rule
4170 numbers referred to here are those in the AAPCS. */
4172 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4173 const_tree type, bool named)
4178 /* We only need to do this once per argument. */
4179 if (pcum->aapcs_arg_processed)
4182 pcum->aapcs_arg_processed = true;
4184 /* Special case: if named is false then we are handling an incoming
4185 anonymous argument which is on the stack. */
4189 /* Is this a potential co-processor register candidate? */
4190 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4192 int slot = aapcs_select_call_coproc (pcum, mode, type);
4193 pcum->aapcs_cprc_slot = slot;
4195 /* We don't have to apply any of the rules from part B of the
4196 preparation phase, these are handled elsewhere in the
4201 /* A Co-processor register candidate goes either in its own
4202 class of registers or on the stack. */
4203 if (!pcum->aapcs_cprc_failed[slot])
4205 /* C1.cp - Try to allocate the argument to co-processor
4207 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4210 /* C2.cp - Put the argument on the stack and note that we
4211 can't assign any more candidates in this slot. We also
4212 need to note that we have allocated stack space, so that
4213 we won't later try to split a non-cprc candidate between
4214 core registers and the stack. */
4215 pcum->aapcs_cprc_failed[slot] = true;
4216 pcum->can_split = false;
4219 /* We didn't get a register, so this argument goes on the
4221 gcc_assert (pcum->can_split == false);
4226 /* C3 - For double-word aligned arguments, round the NCRN up to the
4227 next even number. */
4228 ncrn = pcum->aapcs_ncrn;
4229 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4232 nregs = ARM_NUM_REGS2(mode, type);
4234 /* Sigh, this test should really assert that nregs > 0, but a GCC
4235 extension allows empty structs and then gives them empty size; it
4236 then allows such a structure to be passed by value. For some of
4237 the code below we have to pretend that such an argument has
4238 non-zero size so that we 'locate' it correctly either in
4239 registers or on the stack. */
4240 gcc_assert (nregs >= 0);
4242 nregs2 = nregs ? nregs : 1;
4244 /* C4 - Argument fits entirely in core registers. */
4245 if (ncrn + nregs2 <= NUM_ARG_REGS)
4247 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4248 pcum->aapcs_next_ncrn = ncrn + nregs;
4252 /* C5 - Some core registers left and there are no arguments already
4253 on the stack: split this argument between the remaining core
4254 registers and the stack. */
4255 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4257 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4258 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4259 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4263 /* C6 - NCRN is set to 4. */
4264 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4266 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4270 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4271 for a call to a function whose data type is FNTYPE.
4272 For a library call, FNTYPE is NULL. */
4274 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4276 tree fndecl ATTRIBUTE_UNUSED)
4278 /* Long call handling. */
4280 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4282 pcum->pcs_variant = arm_pcs_default;
4284 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4286 if (arm_libcall_uses_aapcs_base (libname))
4287 pcum->pcs_variant = ARM_PCS_AAPCS;
4289 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4290 pcum->aapcs_reg = NULL_RTX;
4291 pcum->aapcs_partial = 0;
4292 pcum->aapcs_arg_processed = false;
4293 pcum->aapcs_cprc_slot = -1;
4294 pcum->can_split = true;
4296 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4300 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4302 pcum->aapcs_cprc_failed[i] = false;
4303 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4311 /* On the ARM, the offset starts at 0. */
4313 pcum->iwmmxt_nregs = 0;
4314 pcum->can_split = true;
4316 /* Varargs vectors are treated the same as long long.
4317 named_count avoids having to change the way arm handles 'named' */
4318 pcum->named_count = 0;
4321 if (TARGET_REALLY_IWMMXT && fntype)
4325 for (fn_arg = TYPE_ARG_TYPES (fntype);
4327 fn_arg = TREE_CHAIN (fn_arg))
4328 pcum->named_count += 1;
4330 if (! pcum->named_count)
4331 pcum->named_count = INT_MAX;
4336 /* Return true if mode/type need doubleword alignment. */
4338 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4340 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4341 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4345 /* Determine where to put an argument to a function.
4346 Value is zero to push the argument on the stack,
4347 or a hard register in which to store the argument.
4349 MODE is the argument's machine mode.
4350 TYPE is the data type of the argument (as a tree).
4351 This is null for libcalls where that information may
4353 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4354 the preceding args and about the function being called.
4355 NAMED is nonzero if this argument is a named parameter
4356 (otherwise it is an extra parameter matching an ellipsis).
4358 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4359 other arguments are passed on the stack. If (NAMED == 0) (which happens
4360 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4361 defined), say it is passed in the stack (function_prologue will
4362 indeed make it pass in the stack if necessary). */
4365 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4366 const_tree type, bool named)
4370 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4371 a call insn (op3 of a call_value insn). */
4372 if (mode == VOIDmode)
4375 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4377 aapcs_layout_arg (pcum, mode, type, named);
4378 return pcum->aapcs_reg;
4381 /* Varargs vectors are treated the same as long long.
4382 named_count avoids having to change the way arm handles 'named' */
4383 if (TARGET_IWMMXT_ABI
4384 && arm_vector_mode_supported_p (mode)
4385 && pcum->named_count > pcum->nargs + 1)
4387 if (pcum->iwmmxt_nregs <= 9)
4388 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4391 pcum->can_split = false;
4396 /* Put doubleword aligned quantities in even register pairs. */
4398 && ARM_DOUBLEWORD_ALIGN
4399 && arm_needs_doubleword_align (mode, type))
4402 /* Only allow splitting an arg between regs and memory if all preceding
4403 args were allocated to regs. For args passed by reference we only count
4404 the reference pointer. */
4405 if (pcum->can_split)
4408 nregs = ARM_NUM_REGS2 (mode, type);
4410 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4413 return gen_rtx_REG (mode, pcum->nregs);
4417 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4419 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4420 ? DOUBLEWORD_ALIGNMENT
4425 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4426 tree type, bool named)
4428 int nregs = pcum->nregs;
4430 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4432 aapcs_layout_arg (pcum, mode, type, named);
4433 return pcum->aapcs_partial;
4436 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4439 if (NUM_ARG_REGS > nregs
4440 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4442 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4447 /* Update the data in PCUM to advance over an argument
4448 of mode MODE and data type TYPE.
4449 (TYPE is null for libcalls where that information may not be available.) */
4452 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4453 const_tree type, bool named)
4455 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4457 aapcs_layout_arg (pcum, mode, type, named);
4459 if (pcum->aapcs_cprc_slot >= 0)
4461 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4463 pcum->aapcs_cprc_slot = -1;
4466 /* Generic stuff. */
4467 pcum->aapcs_arg_processed = false;
4468 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4469 pcum->aapcs_reg = NULL_RTX;
4470 pcum->aapcs_partial = 0;
4475 if (arm_vector_mode_supported_p (mode)
4476 && pcum->named_count > pcum->nargs
4477 && TARGET_IWMMXT_ABI)
4478 pcum->iwmmxt_nregs += 1;
4480 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4484 /* Variable sized types are passed by reference. This is a GCC
4485 extension to the ARM ABI. */
4488 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4489 enum machine_mode mode ATTRIBUTE_UNUSED,
4490 const_tree type, bool named ATTRIBUTE_UNUSED)
4492 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4495 /* Encode the current state of the #pragma [no_]long_calls. */
4498 OFF, /* No #pragma [no_]long_calls is in effect. */
4499 LONG, /* #pragma long_calls is in effect. */
4500 SHORT /* #pragma no_long_calls is in effect. */
4503 static arm_pragma_enum arm_pragma_long_calls = OFF;
4506 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4508 arm_pragma_long_calls = LONG;
4512 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4514 arm_pragma_long_calls = SHORT;
4518 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4520 arm_pragma_long_calls = OFF;
4523 /* Handle an attribute requiring a FUNCTION_DECL;
4524 arguments as in struct attribute_spec.handler. */
4526 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4527 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4529 if (TREE_CODE (*node) != FUNCTION_DECL)
4531 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4533 *no_add_attrs = true;
4539 /* Handle an "interrupt" or "isr" attribute;
4540 arguments as in struct attribute_spec.handler. */
4542 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4547 if (TREE_CODE (*node) != FUNCTION_DECL)
4549 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4551 *no_add_attrs = true;
4553 /* FIXME: the argument if any is checked for type attributes;
4554 should it be checked for decl ones? */
4558 if (TREE_CODE (*node) == FUNCTION_TYPE
4559 || TREE_CODE (*node) == METHOD_TYPE)
4561 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4563 warning (OPT_Wattributes, "%qE attribute ignored",
4565 *no_add_attrs = true;
4568 else if (TREE_CODE (*node) == POINTER_TYPE
4569 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4570 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4571 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4573 *node = build_variant_type_copy (*node);
4574 TREE_TYPE (*node) = build_type_attribute_variant
4576 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4577 *no_add_attrs = true;
4581 /* Possibly pass this attribute on from the type to a decl. */
4582 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4583 | (int) ATTR_FLAG_FUNCTION_NEXT
4584 | (int) ATTR_FLAG_ARRAY_NEXT))
4586 *no_add_attrs = true;
4587 return tree_cons (name, args, NULL_TREE);
4591 warning (OPT_Wattributes, "%qE attribute ignored",
4600 /* Handle a "pcs" attribute; arguments as in struct
4601 attribute_spec.handler. */
4603 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4604 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4606 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4608 warning (OPT_Wattributes, "%qE attribute ignored", name);
4609 *no_add_attrs = true;
4614 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4615 /* Handle the "notshared" attribute. This attribute is another way of
4616 requesting hidden visibility. ARM's compiler supports
4617 "__declspec(notshared)"; we support the same thing via an
4621 arm_handle_notshared_attribute (tree *node,
4622 tree name ATTRIBUTE_UNUSED,
4623 tree args ATTRIBUTE_UNUSED,
4624 int flags ATTRIBUTE_UNUSED,
4627 tree decl = TYPE_NAME (*node);
4631 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4632 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4633 *no_add_attrs = false;
4639 /* Return 0 if the attributes for two types are incompatible, 1 if they
4640 are compatible, and 2 if they are nearly compatible (which causes a
4641 warning to be generated). */
4643 arm_comp_type_attributes (const_tree type1, const_tree type2)
4647 /* Check for mismatch of non-default calling convention. */
4648 if (TREE_CODE (type1) != FUNCTION_TYPE)
4651 /* Check for mismatched call attributes. */
4652 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4653 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4654 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4655 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4657 /* Only bother to check if an attribute is defined. */
4658 if (l1 | l2 | s1 | s2)
4660 /* If one type has an attribute, the other must have the same attribute. */
4661 if ((l1 != l2) || (s1 != s2))
4664 /* Disallow mixed attributes. */
4665 if ((l1 & s2) || (l2 & s1))
4669 /* Check for mismatched ISR attribute. */
4670 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4672 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4673 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4675 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4682 /* Assigns default attributes to newly defined type. This is used to
4683 set short_call/long_call attributes for function types of
4684 functions defined inside corresponding #pragma scopes. */
4686 arm_set_default_type_attributes (tree type)
4688 /* Add __attribute__ ((long_call)) to all functions, when
4689 inside #pragma long_calls or __attribute__ ((short_call)),
4690 when inside #pragma no_long_calls. */
4691 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4693 tree type_attr_list, attr_name;
4694 type_attr_list = TYPE_ATTRIBUTES (type);
4696 if (arm_pragma_long_calls == LONG)
4697 attr_name = get_identifier ("long_call");
4698 else if (arm_pragma_long_calls == SHORT)
4699 attr_name = get_identifier ("short_call");
4703 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4704 TYPE_ATTRIBUTES (type) = type_attr_list;
4708 /* Return true if DECL is known to be linked into section SECTION. */
4711 arm_function_in_section_p (tree decl, section *section)
4713 /* We can only be certain about functions defined in the same
4714 compilation unit. */
4715 if (!TREE_STATIC (decl))
4718 /* Make sure that SYMBOL always binds to the definition in this
4719 compilation unit. */
4720 if (!targetm.binds_local_p (decl))
4723 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4724 if (!DECL_SECTION_NAME (decl))
4726 /* Make sure that we will not create a unique section for DECL. */
4727 if (flag_function_sections || DECL_ONE_ONLY (decl))
4731 return function_section (decl) == section;
4734 /* Return nonzero if a 32-bit "long_call" should be generated for
4735 a call from the current function to DECL. We generate a long_call
4738 a. has an __attribute__((long call))
4739 or b. is within the scope of a #pragma long_calls
4740 or c. the -mlong-calls command line switch has been specified
4742 However we do not generate a long call if the function:
4744 d. has an __attribute__ ((short_call))
4745 or e. is inside the scope of a #pragma no_long_calls
4746 or f. is defined in the same section as the current function. */
4749 arm_is_long_call_p (tree decl)
4754 return TARGET_LONG_CALLS;
4756 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4757 if (lookup_attribute ("short_call", attrs))
4760 /* For "f", be conservative, and only cater for cases in which the
4761 whole of the current function is placed in the same section. */
4762 if (!flag_reorder_blocks_and_partition
4763 && TREE_CODE (decl) == FUNCTION_DECL
4764 && arm_function_in_section_p (decl, current_function_section ()))
4767 if (lookup_attribute ("long_call", attrs))
4770 return TARGET_LONG_CALLS;
4773 /* Return nonzero if it is ok to make a tail-call to DECL. */
4775 arm_function_ok_for_sibcall (tree decl, tree exp)
4777 unsigned long func_type;
4779 if (cfun->machine->sibcall_blocked)
4782 /* Never tailcall something for which we have no decl, or if we
4783 are generating code for Thumb-1. */
4784 if (decl == NULL || TARGET_THUMB1)
4787 /* The PIC register is live on entry to VxWorks PLT entries, so we
4788 must make the call before restoring the PIC register. */
4789 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4792 /* Cannot tail-call to long calls, since these are out of range of
4793 a branch instruction. */
4794 if (arm_is_long_call_p (decl))
4797 /* If we are interworking and the function is not declared static
4798 then we can't tail-call it unless we know that it exists in this
4799 compilation unit (since it might be a Thumb routine). */
4800 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4803 func_type = arm_current_func_type ();
4804 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4805 if (IS_INTERRUPT (func_type))
4808 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4810 /* Check that the return value locations are the same. For
4811 example that we aren't returning a value from the sibling in
4812 a VFP register but then need to transfer it to a core
4816 a = arm_function_value (TREE_TYPE (exp), decl, false);
4817 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4819 if (!rtx_equal_p (a, b))
4823 /* Never tailcall if function may be called with a misaligned SP. */
4824 if (IS_STACKALIGN (func_type))
4827 /* Everything else is ok. */
4832 /* Addressing mode support functions. */
4834 /* Return nonzero if X is a legitimate immediate operand when compiling
4835 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4837 legitimate_pic_operand_p (rtx x)
4839 if (GET_CODE (x) == SYMBOL_REF
4840 || (GET_CODE (x) == CONST
4841 && GET_CODE (XEXP (x, 0)) == PLUS
4842 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4848 /* Record that the current function needs a PIC register. Initialize
4849 cfun->machine->pic_reg if we have not already done so. */
4852 require_pic_register (void)
4854 /* A lot of the logic here is made obscure by the fact that this
4855 routine gets called as part of the rtx cost estimation process.
4856 We don't want those calls to affect any assumptions about the real
4857 function; and further, we can't call entry_of_function() until we
4858 start the real expansion process. */
4859 if (!crtl->uses_pic_offset_table)
4861 gcc_assert (can_create_pseudo_p ());
4862 if (arm_pic_register != INVALID_REGNUM)
4864 if (!cfun->machine->pic_reg)
4865 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4867 /* Play games to avoid marking the function as needing pic
4868 if we are being called as part of the cost-estimation
4870 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4871 crtl->uses_pic_offset_table = 1;
4877 if (!cfun->machine->pic_reg)
4878 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4880 /* Play games to avoid marking the function as needing pic
4881 if we are being called as part of the cost-estimation
4883 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4885 crtl->uses_pic_offset_table = 1;
4888 arm_load_pic_register (0UL);
4893 for (insn = seq; insn; insn = NEXT_INSN (insn))
4895 INSN_LOCATOR (insn) = prologue_locator;
4897 /* We can be called during expansion of PHI nodes, where
4898 we can't yet emit instructions directly in the final
4899 insn stream. Queue the insns on the entry edge, they will
4900 be committed after everything else is expanded. */
4901 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4908 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4910 if (GET_CODE (orig) == SYMBOL_REF
4911 || GET_CODE (orig) == LABEL_REF)
4917 gcc_assert (can_create_pseudo_p ());
4918 reg = gen_reg_rtx (Pmode);
4921 /* VxWorks does not impose a fixed gap between segments; the run-time
4922 gap can be different from the object-file gap. We therefore can't
4923 use GOTOFF unless we are absolutely sure that the symbol is in the
4924 same segment as the GOT. Unfortunately, the flexibility of linker
4925 scripts means that we can't be sure of that in general, so assume
4926 that GOTOFF is never valid on VxWorks. */
4927 if ((GET_CODE (orig) == LABEL_REF
4928 || (GET_CODE (orig) == SYMBOL_REF &&
4929 SYMBOL_REF_LOCAL_P (orig)))
4931 && !TARGET_VXWORKS_RTP)
4932 insn = arm_pic_static_addr (orig, reg);
4938 /* If this function doesn't have a pic register, create one now. */
4939 require_pic_register ();
4941 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
4943 /* Make the MEM as close to a constant as possible. */
4944 mem = SET_SRC (pat);
4945 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
4946 MEM_READONLY_P (mem) = 1;
4947 MEM_NOTRAP_P (mem) = 1;
4949 insn = emit_insn (pat);
4952 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4954 set_unique_reg_note (insn, REG_EQUAL, orig);
4958 else if (GET_CODE (orig) == CONST)
4962 if (GET_CODE (XEXP (orig, 0)) == PLUS
4963 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
4966 /* Handle the case where we have: const (UNSPEC_TLS). */
4967 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
4968 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
4971 /* Handle the case where we have:
4972 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
4974 if (GET_CODE (XEXP (orig, 0)) == PLUS
4975 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
4976 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
4978 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
4984 gcc_assert (can_create_pseudo_p ());
4985 reg = gen_reg_rtx (Pmode);
4988 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4990 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
4991 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
4992 base == reg ? 0 : reg);
4994 if (GET_CODE (offset) == CONST_INT)
4996 /* The base register doesn't really matter, we only want to
4997 test the index for the appropriate mode. */
4998 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5000 gcc_assert (can_create_pseudo_p ());
5001 offset = force_reg (Pmode, offset);
5004 if (GET_CODE (offset) == CONST_INT)
5005 return plus_constant (base, INTVAL (offset));
5008 if (GET_MODE_SIZE (mode) > 4
5009 && (GET_MODE_CLASS (mode) == MODE_INT
5010 || TARGET_SOFT_FLOAT))
5012 emit_insn (gen_addsi3 (reg, base, offset));
5016 return gen_rtx_PLUS (Pmode, base, offset);
5023 /* Find a spare register to use during the prolog of a function. */
5026 thumb_find_work_register (unsigned long pushed_regs_mask)
5030 /* Check the argument registers first as these are call-used. The
5031 register allocation order means that sometimes r3 might be used
5032 but earlier argument registers might not, so check them all. */
5033 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5034 if (!df_regs_ever_live_p (reg))
5037 /* Before going on to check the call-saved registers we can try a couple
5038 more ways of deducing that r3 is available. The first is when we are
5039 pushing anonymous arguments onto the stack and we have less than 4
5040 registers worth of fixed arguments(*). In this case r3 will be part of
5041 the variable argument list and so we can be sure that it will be
5042 pushed right at the start of the function. Hence it will be available
5043 for the rest of the prologue.
5044 (*): ie crtl->args.pretend_args_size is greater than 0. */
5045 if (cfun->machine->uses_anonymous_args
5046 && crtl->args.pretend_args_size > 0)
5047 return LAST_ARG_REGNUM;
5049 /* The other case is when we have fixed arguments but less than 4 registers
5050 worth. In this case r3 might be used in the body of the function, but
5051 it is not being used to convey an argument into the function. In theory
5052 we could just check crtl->args.size to see how many bytes are
5053 being passed in argument registers, but it seems that it is unreliable.
5054 Sometimes it will have the value 0 when in fact arguments are being
5055 passed. (See testcase execute/20021111-1.c for an example). So we also
5056 check the args_info.nregs field as well. The problem with this field is
5057 that it makes no allowances for arguments that are passed to the
5058 function but which are not used. Hence we could miss an opportunity
5059 when a function has an unused argument in r3. But it is better to be
5060 safe than to be sorry. */
5061 if (! cfun->machine->uses_anonymous_args
5062 && crtl->args.size >= 0
5063 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5064 && crtl->args.info.nregs < 4)
5065 return LAST_ARG_REGNUM;
5067 /* Otherwise look for a call-saved register that is going to be pushed. */
5068 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5069 if (pushed_regs_mask & (1 << reg))
5074 /* Thumb-2 can use high regs. */
5075 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5076 if (pushed_regs_mask & (1 << reg))
5079 /* Something went wrong - thumb_compute_save_reg_mask()
5080 should have arranged for a suitable register to be pushed. */
5084 static GTY(()) int pic_labelno;
5086 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5090 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5092 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5094 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5097 gcc_assert (flag_pic);
5099 pic_reg = cfun->machine->pic_reg;
5100 if (TARGET_VXWORKS_RTP)
5102 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5103 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5104 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5106 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5108 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5109 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5113 /* We use an UNSPEC rather than a LABEL_REF because this label
5114 never appears in the code stream. */
5116 labelno = GEN_INT (pic_labelno++);
5117 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5118 l1 = gen_rtx_CONST (VOIDmode, l1);
5120 /* On the ARM the PC register contains 'dot + 8' at the time of the
5121 addition, on the Thumb it is 'dot + 4'. */
5122 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5123 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5125 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5129 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5131 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5133 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5135 else /* TARGET_THUMB1 */
5137 if (arm_pic_register != INVALID_REGNUM
5138 && REGNO (pic_reg) > LAST_LO_REGNUM)
5140 /* We will have pushed the pic register, so we should always be
5141 able to find a work register. */
5142 pic_tmp = gen_rtx_REG (SImode,
5143 thumb_find_work_register (saved_regs));
5144 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5145 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5148 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5149 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5153 /* Need to emit this whether or not we obey regdecls,
5154 since setjmp/longjmp can cause life info to screw up. */
5158 /* Generate code to load the address of a static var when flag_pic is set. */
5160 arm_pic_static_addr (rtx orig, rtx reg)
5162 rtx l1, labelno, offset_rtx, insn;
5164 gcc_assert (flag_pic);
5166 /* We use an UNSPEC rather than a LABEL_REF because this label
5167 never appears in the code stream. */
5168 labelno = GEN_INT (pic_labelno++);
5169 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5170 l1 = gen_rtx_CONST (VOIDmode, l1);
5172 /* On the ARM the PC register contains 'dot + 8' at the time of the
5173 addition, on the Thumb it is 'dot + 4'. */
5174 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5175 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5176 UNSPEC_SYMBOL_OFFSET);
5177 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5181 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5183 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5185 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5187 else /* TARGET_THUMB1 */
5189 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5190 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5196 /* Return nonzero if X is valid as an ARM state addressing register. */
5198 arm_address_register_rtx_p (rtx x, int strict_p)
5202 if (GET_CODE (x) != REG)
5208 return ARM_REGNO_OK_FOR_BASE_P (regno);
5210 return (regno <= LAST_ARM_REGNUM
5211 || regno >= FIRST_PSEUDO_REGISTER
5212 || regno == FRAME_POINTER_REGNUM
5213 || regno == ARG_POINTER_REGNUM);
5216 /* Return TRUE if this rtx is the difference of a symbol and a label,
5217 and will reduce to a PC-relative relocation in the object file.
5218 Expressions like this can be left alone when generating PIC, rather
5219 than forced through the GOT. */
5221 pcrel_constant_p (rtx x)
5223 if (GET_CODE (x) == MINUS)
5224 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5229 /* Return true if X will surely end up in an index register after next
5232 will_be_in_index_register (const_rtx x)
5234 /* arm.md: calculate_pic_address will split this into a register. */
5235 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5238 /* Return nonzero if X is a valid ARM state address operand. */
5240 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5244 enum rtx_code code = GET_CODE (x);
5246 if (arm_address_register_rtx_p (x, strict_p))
5249 use_ldrd = (TARGET_LDRD
5251 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5253 if (code == POST_INC || code == PRE_DEC
5254 || ((code == PRE_INC || code == POST_DEC)
5255 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5256 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5258 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5259 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5260 && GET_CODE (XEXP (x, 1)) == PLUS
5261 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5263 rtx addend = XEXP (XEXP (x, 1), 1);
5265 /* Don't allow ldrd post increment by register because it's hard
5266 to fixup invalid register choices. */
5268 && GET_CODE (x) == POST_MODIFY
5269 && GET_CODE (addend) == REG)
5272 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5273 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5276 /* After reload constants split into minipools will have addresses
5277 from a LABEL_REF. */
5278 else if (reload_completed
5279 && (code == LABEL_REF
5281 && GET_CODE (XEXP (x, 0)) == PLUS
5282 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5283 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5286 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5289 else if (code == PLUS)
5291 rtx xop0 = XEXP (x, 0);
5292 rtx xop1 = XEXP (x, 1);
5294 return ((arm_address_register_rtx_p (xop0, strict_p)
5295 && ((GET_CODE(xop1) == CONST_INT
5296 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5297 || (!strict_p && will_be_in_index_register (xop1))))
5298 || (arm_address_register_rtx_p (xop1, strict_p)
5299 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5303 /* Reload currently can't handle MINUS, so disable this for now */
5304 else if (GET_CODE (x) == MINUS)
5306 rtx xop0 = XEXP (x, 0);
5307 rtx xop1 = XEXP (x, 1);
5309 return (arm_address_register_rtx_p (xop0, strict_p)
5310 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5314 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5315 && code == SYMBOL_REF
5316 && CONSTANT_POOL_ADDRESS_P (x)
5318 && symbol_mentioned_p (get_pool_constant (x))
5319 && ! pcrel_constant_p (get_pool_constant (x))))
5325 /* Return nonzero if X is a valid Thumb-2 address operand. */
5327 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5330 enum rtx_code code = GET_CODE (x);
5332 if (arm_address_register_rtx_p (x, strict_p))
5335 use_ldrd = (TARGET_LDRD
5337 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5339 if (code == POST_INC || code == PRE_DEC
5340 || ((code == PRE_INC || code == POST_DEC)
5341 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5342 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5344 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5345 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5346 && GET_CODE (XEXP (x, 1)) == PLUS
5347 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5349 /* Thumb-2 only has autoincrement by constant. */
5350 rtx addend = XEXP (XEXP (x, 1), 1);
5351 HOST_WIDE_INT offset;
5353 if (GET_CODE (addend) != CONST_INT)
5356 offset = INTVAL(addend);
5357 if (GET_MODE_SIZE (mode) <= 4)
5358 return (offset > -256 && offset < 256);
5360 return (use_ldrd && offset > -1024 && offset < 1024
5361 && (offset & 3) == 0);
5364 /* After reload constants split into minipools will have addresses
5365 from a LABEL_REF. */
5366 else if (reload_completed
5367 && (code == LABEL_REF
5369 && GET_CODE (XEXP (x, 0)) == PLUS
5370 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5371 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5374 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5377 else if (code == PLUS)
5379 rtx xop0 = XEXP (x, 0);
5380 rtx xop1 = XEXP (x, 1);
5382 return ((arm_address_register_rtx_p (xop0, strict_p)
5383 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5384 || (!strict_p && will_be_in_index_register (xop1))))
5385 || (arm_address_register_rtx_p (xop1, strict_p)
5386 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5389 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5390 && code == SYMBOL_REF
5391 && CONSTANT_POOL_ADDRESS_P (x)
5393 && symbol_mentioned_p (get_pool_constant (x))
5394 && ! pcrel_constant_p (get_pool_constant (x))))
5400 /* Return nonzero if INDEX is valid for an address index operand in
5403 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5406 HOST_WIDE_INT range;
5407 enum rtx_code code = GET_CODE (index);
5409 /* Standard coprocessor addressing modes. */
5410 if (TARGET_HARD_FLOAT
5411 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5412 && (mode == SFmode || mode == DFmode
5413 || (TARGET_MAVERICK && mode == DImode)))
5414 return (code == CONST_INT && INTVAL (index) < 1024
5415 && INTVAL (index) > -1024
5416 && (INTVAL (index) & 3) == 0);
5418 /* For quad modes, we restrict the constant offset to be slightly less
5419 than what the instruction format permits. We do this because for
5420 quad mode moves, we will actually decompose them into two separate
5421 double-mode reads or writes. INDEX must therefore be a valid
5422 (double-mode) offset and so should INDEX+8. */
5423 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5424 return (code == CONST_INT
5425 && INTVAL (index) < 1016
5426 && INTVAL (index) > -1024
5427 && (INTVAL (index) & 3) == 0);
5429 /* We have no such constraint on double mode offsets, so we permit the
5430 full range of the instruction format. */
5431 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5432 return (code == CONST_INT
5433 && INTVAL (index) < 1024
5434 && INTVAL (index) > -1024
5435 && (INTVAL (index) & 3) == 0);
5437 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5438 return (code == CONST_INT
5439 && INTVAL (index) < 1024
5440 && INTVAL (index) > -1024
5441 && (INTVAL (index) & 3) == 0);
5443 if (arm_address_register_rtx_p (index, strict_p)
5444 && (GET_MODE_SIZE (mode) <= 4))
5447 if (mode == DImode || mode == DFmode)
5449 if (code == CONST_INT)
5451 HOST_WIDE_INT val = INTVAL (index);
5454 return val > -256 && val < 256;
5456 return val > -4096 && val < 4092;
5459 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5462 if (GET_MODE_SIZE (mode) <= 4
5466 || (mode == QImode && outer == SIGN_EXTEND))))
5470 rtx xiop0 = XEXP (index, 0);
5471 rtx xiop1 = XEXP (index, 1);
5473 return ((arm_address_register_rtx_p (xiop0, strict_p)
5474 && power_of_two_operand (xiop1, SImode))
5475 || (arm_address_register_rtx_p (xiop1, strict_p)
5476 && power_of_two_operand (xiop0, SImode)));
5478 else if (code == LSHIFTRT || code == ASHIFTRT
5479 || code == ASHIFT || code == ROTATERT)
5481 rtx op = XEXP (index, 1);
5483 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5484 && GET_CODE (op) == CONST_INT
5486 && INTVAL (op) <= 31);
5490 /* For ARM v4 we may be doing a sign-extend operation during the
5496 || (outer == SIGN_EXTEND && mode == QImode))
5502 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5504 return (code == CONST_INT
5505 && INTVAL (index) < range
5506 && INTVAL (index) > -range);
5509 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5510 index operand. i.e. 1, 2, 4 or 8. */
5512 thumb2_index_mul_operand (rtx op)
5516 if (GET_CODE(op) != CONST_INT)
5520 return (val == 1 || val == 2 || val == 4 || val == 8);
5523 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5525 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5527 enum rtx_code code = GET_CODE (index);
5529 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5530 /* Standard coprocessor addressing modes. */
5531 if (TARGET_HARD_FLOAT
5532 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5533 && (mode == SFmode || mode == DFmode
5534 || (TARGET_MAVERICK && mode == DImode)))
5535 return (code == CONST_INT && INTVAL (index) < 1024
5536 /* Thumb-2 allows only > -256 index range for it's core register
5537 load/stores. Since we allow SF/DF in core registers, we have
5538 to use the intersection between -256~4096 (core) and -1024~1024
5540 && INTVAL (index) > -256
5541 && (INTVAL (index) & 3) == 0);
5543 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5545 /* For DImode assume values will usually live in core regs
5546 and only allow LDRD addressing modes. */
5547 if (!TARGET_LDRD || mode != DImode)
5548 return (code == CONST_INT
5549 && INTVAL (index) < 1024
5550 && INTVAL (index) > -1024
5551 && (INTVAL (index) & 3) == 0);
5554 /* For quad modes, we restrict the constant offset to be slightly less
5555 than what the instruction format permits. We do this because for
5556 quad mode moves, we will actually decompose them into two separate
5557 double-mode reads or writes. INDEX must therefore be a valid
5558 (double-mode) offset and so should INDEX+8. */
5559 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5560 return (code == CONST_INT
5561 && INTVAL (index) < 1016
5562 && INTVAL (index) > -1024
5563 && (INTVAL (index) & 3) == 0);
5565 /* We have no such constraint on double mode offsets, so we permit the
5566 full range of the instruction format. */
5567 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5568 return (code == CONST_INT
5569 && INTVAL (index) < 1024
5570 && INTVAL (index) > -1024
5571 && (INTVAL (index) & 3) == 0);
5573 if (arm_address_register_rtx_p (index, strict_p)
5574 && (GET_MODE_SIZE (mode) <= 4))
5577 if (mode == DImode || mode == DFmode)
5579 if (code == CONST_INT)
5581 HOST_WIDE_INT val = INTVAL (index);
5582 /* ??? Can we assume ldrd for thumb2? */
5583 /* Thumb-2 ldrd only has reg+const addressing modes. */
5584 /* ldrd supports offsets of +-1020.
5585 However the ldr fallback does not. */
5586 return val > -256 && val < 256 && (val & 3) == 0;
5594 rtx xiop0 = XEXP (index, 0);
5595 rtx xiop1 = XEXP (index, 1);
5597 return ((arm_address_register_rtx_p (xiop0, strict_p)
5598 && thumb2_index_mul_operand (xiop1))
5599 || (arm_address_register_rtx_p (xiop1, strict_p)
5600 && thumb2_index_mul_operand (xiop0)));
5602 else if (code == ASHIFT)
5604 rtx op = XEXP (index, 1);
5606 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5607 && GET_CODE (op) == CONST_INT
5609 && INTVAL (op) <= 3);
5612 return (code == CONST_INT
5613 && INTVAL (index) < 4096
5614 && INTVAL (index) > -256);
5617 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5619 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5623 if (GET_CODE (x) != REG)
5629 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5631 return (regno <= LAST_LO_REGNUM
5632 || regno > LAST_VIRTUAL_REGISTER
5633 || regno == FRAME_POINTER_REGNUM
5634 || (GET_MODE_SIZE (mode) >= 4
5635 && (regno == STACK_POINTER_REGNUM
5636 || regno >= FIRST_PSEUDO_REGISTER
5637 || x == hard_frame_pointer_rtx
5638 || x == arg_pointer_rtx)));
5641 /* Return nonzero if x is a legitimate index register. This is the case
5642 for any base register that can access a QImode object. */
5644 thumb1_index_register_rtx_p (rtx x, int strict_p)
5646 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5649 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5651 The AP may be eliminated to either the SP or the FP, so we use the
5652 least common denominator, e.g. SImode, and offsets from 0 to 64.
5654 ??? Verify whether the above is the right approach.
5656 ??? Also, the FP may be eliminated to the SP, so perhaps that
5657 needs special handling also.
5659 ??? Look at how the mips16 port solves this problem. It probably uses
5660 better ways to solve some of these problems.
5662 Although it is not incorrect, we don't accept QImode and HImode
5663 addresses based on the frame pointer or arg pointer until the
5664 reload pass starts. This is so that eliminating such addresses
5665 into stack based ones won't produce impossible code. */
5667 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5669 /* ??? Not clear if this is right. Experiment. */
5670 if (GET_MODE_SIZE (mode) < 4
5671 && !(reload_in_progress || reload_completed)
5672 && (reg_mentioned_p (frame_pointer_rtx, x)
5673 || reg_mentioned_p (arg_pointer_rtx, x)
5674 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5675 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5676 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5677 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5680 /* Accept any base register. SP only in SImode or larger. */
5681 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5684 /* This is PC relative data before arm_reorg runs. */
5685 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5686 && GET_CODE (x) == SYMBOL_REF
5687 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5690 /* This is PC relative data after arm_reorg runs. */
5691 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5693 && (GET_CODE (x) == LABEL_REF
5694 || (GET_CODE (x) == CONST
5695 && GET_CODE (XEXP (x, 0)) == PLUS
5696 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5697 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5700 /* Post-inc indexing only supported for SImode and larger. */
5701 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5702 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5705 else if (GET_CODE (x) == PLUS)
5707 /* REG+REG address can be any two index registers. */
5708 /* We disallow FRAME+REG addressing since we know that FRAME
5709 will be replaced with STACK, and SP relative addressing only
5710 permits SP+OFFSET. */
5711 if (GET_MODE_SIZE (mode) <= 4
5712 && XEXP (x, 0) != frame_pointer_rtx
5713 && XEXP (x, 1) != frame_pointer_rtx
5714 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5715 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5716 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5719 /* REG+const has 5-7 bit offset for non-SP registers. */
5720 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5721 || XEXP (x, 0) == arg_pointer_rtx)
5722 && GET_CODE (XEXP (x, 1)) == CONST_INT
5723 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5726 /* REG+const has 10-bit offset for SP, but only SImode and
5727 larger is supported. */
5728 /* ??? Should probably check for DI/DFmode overflow here
5729 just like GO_IF_LEGITIMATE_OFFSET does. */
5730 else if (GET_CODE (XEXP (x, 0)) == REG
5731 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5732 && GET_MODE_SIZE (mode) >= 4
5733 && GET_CODE (XEXP (x, 1)) == CONST_INT
5734 && INTVAL (XEXP (x, 1)) >= 0
5735 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5736 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5739 else if (GET_CODE (XEXP (x, 0)) == REG
5740 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5741 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5742 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5743 && REGNO (XEXP (x, 0))
5744 <= LAST_VIRTUAL_POINTER_REGISTER))
5745 && GET_MODE_SIZE (mode) >= 4
5746 && GET_CODE (XEXP (x, 1)) == CONST_INT
5747 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5751 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5752 && GET_MODE_SIZE (mode) == 4
5753 && GET_CODE (x) == SYMBOL_REF
5754 && CONSTANT_POOL_ADDRESS_P (x)
5756 && symbol_mentioned_p (get_pool_constant (x))
5757 && ! pcrel_constant_p (get_pool_constant (x))))
5763 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5764 instruction of mode MODE. */
5766 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5768 switch (GET_MODE_SIZE (mode))
5771 return val >= 0 && val < 32;
5774 return val >= 0 && val < 64 && (val & 1) == 0;
5778 && (val + GET_MODE_SIZE (mode)) <= 128
5784 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5787 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5788 else if (TARGET_THUMB2)
5789 return thumb2_legitimate_address_p (mode, x, strict_p);
5790 else /* if (TARGET_THUMB1) */
5791 return thumb1_legitimate_address_p (mode, x, strict_p);
5794 /* Build the SYMBOL_REF for __tls_get_addr. */
5796 static GTY(()) rtx tls_get_addr_libfunc;
5799 get_tls_get_addr (void)
5801 if (!tls_get_addr_libfunc)
5802 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5803 return tls_get_addr_libfunc;
5807 arm_load_tp (rtx target)
5810 target = gen_reg_rtx (SImode);
5814 /* Can return in any reg. */
5815 emit_insn (gen_load_tp_hard (target));
5819 /* Always returned in r0. Immediately copy the result into a pseudo,
5820 otherwise other uses of r0 (e.g. setting up function arguments) may
5821 clobber the value. */
5825 emit_insn (gen_load_tp_soft ());
5827 tmp = gen_rtx_REG (SImode, 0);
5828 emit_move_insn (target, tmp);
5834 load_tls_operand (rtx x, rtx reg)
5838 if (reg == NULL_RTX)
5839 reg = gen_reg_rtx (SImode);
5841 tmp = gen_rtx_CONST (SImode, x);
5843 emit_move_insn (reg, tmp);
5849 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5851 rtx insns, label, labelno, sum;
5855 labelno = GEN_INT (pic_labelno++);
5856 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5857 label = gen_rtx_CONST (VOIDmode, label);
5859 sum = gen_rtx_UNSPEC (Pmode,
5860 gen_rtvec (4, x, GEN_INT (reloc), label,
5861 GEN_INT (TARGET_ARM ? 8 : 4)),
5863 reg = load_tls_operand (sum, reg);
5866 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5867 else if (TARGET_THUMB2)
5868 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5869 else /* TARGET_THUMB1 */
5870 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5872 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5873 Pmode, 1, reg, Pmode);
5875 insns = get_insns ();
5882 legitimize_tls_address (rtx x, rtx reg)
5884 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5885 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5889 case TLS_MODEL_GLOBAL_DYNAMIC:
5890 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5891 dest = gen_reg_rtx (Pmode);
5892 emit_libcall_block (insns, dest, ret, x);
5895 case TLS_MODEL_LOCAL_DYNAMIC:
5896 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5898 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5899 share the LDM result with other LD model accesses. */
5900 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5902 dest = gen_reg_rtx (Pmode);
5903 emit_libcall_block (insns, dest, ret, eqv);
5905 /* Load the addend. */
5906 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5908 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5909 return gen_rtx_PLUS (Pmode, dest, addend);
5911 case TLS_MODEL_INITIAL_EXEC:
5912 labelno = GEN_INT (pic_labelno++);
5913 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5914 label = gen_rtx_CONST (VOIDmode, label);
5915 sum = gen_rtx_UNSPEC (Pmode,
5916 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5917 GEN_INT (TARGET_ARM ? 8 : 4)),
5919 reg = load_tls_operand (sum, reg);
5922 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5923 else if (TARGET_THUMB2)
5924 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
5927 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5928 emit_move_insn (reg, gen_const_mem (SImode, reg));
5931 tp = arm_load_tp (NULL_RTX);
5933 return gen_rtx_PLUS (Pmode, tp, reg);
5935 case TLS_MODEL_LOCAL_EXEC:
5936 tp = arm_load_tp (NULL_RTX);
5938 reg = gen_rtx_UNSPEC (Pmode,
5939 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5941 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5943 return gen_rtx_PLUS (Pmode, tp, reg);
5950 /* Try machine-dependent ways of modifying an illegitimate address
5951 to be legitimate. If we find one, return the new, valid address. */
5953 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5957 /* TODO: legitimize_address for Thumb2. */
5960 return thumb_legitimize_address (x, orig_x, mode);
5963 if (arm_tls_symbol_p (x))
5964 return legitimize_tls_address (x, NULL_RTX);
5966 if (GET_CODE (x) == PLUS)
5968 rtx xop0 = XEXP (x, 0);
5969 rtx xop1 = XEXP (x, 1);
5971 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
5972 xop0 = force_reg (SImode, xop0);
5974 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
5975 xop1 = force_reg (SImode, xop1);
5977 if (ARM_BASE_REGISTER_RTX_P (xop0)
5978 && GET_CODE (xop1) == CONST_INT)
5980 HOST_WIDE_INT n, low_n;
5984 /* VFP addressing modes actually allow greater offsets, but for
5985 now we just stick with the lowest common denominator. */
5987 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
5999 low_n = ((mode) == TImode ? 0
6000 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6004 base_reg = gen_reg_rtx (SImode);
6005 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6006 emit_move_insn (base_reg, val);
6007 x = plus_constant (base_reg, low_n);
6009 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6010 x = gen_rtx_PLUS (SImode, xop0, xop1);
6013 /* XXX We don't allow MINUS any more -- see comment in
6014 arm_legitimate_address_outer_p (). */
6015 else if (GET_CODE (x) == MINUS)
6017 rtx xop0 = XEXP (x, 0);
6018 rtx xop1 = XEXP (x, 1);
6020 if (CONSTANT_P (xop0))
6021 xop0 = force_reg (SImode, xop0);
6023 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6024 xop1 = force_reg (SImode, xop1);
6026 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6027 x = gen_rtx_MINUS (SImode, xop0, xop1);
6030 /* Make sure to take full advantage of the pre-indexed addressing mode
6031 with absolute addresses which often allows for the base register to
6032 be factorized for multiple adjacent memory references, and it might
6033 even allows for the mini pool to be avoided entirely. */
6034 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6037 HOST_WIDE_INT mask, base, index;
6040 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6041 use a 8-bit index. So let's use a 12-bit index for SImode only and
6042 hope that arm_gen_constant will enable ldrb to use more bits. */
6043 bits = (mode == SImode) ? 12 : 8;
6044 mask = (1 << bits) - 1;
6045 base = INTVAL (x) & ~mask;
6046 index = INTVAL (x) & mask;
6047 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6049 /* It'll most probably be more efficient to generate the base
6050 with more bits set and use a negative index instead. */
6054 base_reg = force_reg (SImode, GEN_INT (base));
6055 x = plus_constant (base_reg, index);
6060 /* We need to find and carefully transform any SYMBOL and LABEL
6061 references; so go back to the original address expression. */
6062 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6064 if (new_x != orig_x)
6072 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6073 to be legitimate. If we find one, return the new, valid address. */
6075 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6077 if (arm_tls_symbol_p (x))
6078 return legitimize_tls_address (x, NULL_RTX);
6080 if (GET_CODE (x) == PLUS
6081 && GET_CODE (XEXP (x, 1)) == CONST_INT
6082 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6083 || INTVAL (XEXP (x, 1)) < 0))
6085 rtx xop0 = XEXP (x, 0);
6086 rtx xop1 = XEXP (x, 1);
6087 HOST_WIDE_INT offset = INTVAL (xop1);
6089 /* Try and fold the offset into a biasing of the base register and
6090 then offsetting that. Don't do this when optimizing for space
6091 since it can cause too many CSEs. */
6092 if (optimize_size && offset >= 0
6093 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6095 HOST_WIDE_INT delta;
6098 delta = offset - (256 - GET_MODE_SIZE (mode));
6099 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6100 delta = 31 * GET_MODE_SIZE (mode);
6102 delta = offset & (~31 * GET_MODE_SIZE (mode));
6104 xop0 = force_operand (plus_constant (xop0, offset - delta),
6106 x = plus_constant (xop0, delta);
6108 else if (offset < 0 && offset > -256)
6109 /* Small negative offsets are best done with a subtract before the
6110 dereference, forcing these into a register normally takes two
6112 x = force_operand (x, NULL_RTX);
6115 /* For the remaining cases, force the constant into a register. */
6116 xop1 = force_reg (SImode, xop1);
6117 x = gen_rtx_PLUS (SImode, xop0, xop1);
6120 else if (GET_CODE (x) == PLUS
6121 && s_register_operand (XEXP (x, 1), SImode)
6122 && !s_register_operand (XEXP (x, 0), SImode))
6124 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6126 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6131 /* We need to find and carefully transform any SYMBOL and LABEL
6132 references; so go back to the original address expression. */
6133 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6135 if (new_x != orig_x)
6143 arm_legitimize_reload_address (rtx *p,
6144 enum machine_mode mode,
6145 int opnum, int type,
6146 int ind_levels ATTRIBUTE_UNUSED)
6148 if (GET_CODE (*p) == PLUS
6149 && GET_CODE (XEXP (*p, 0)) == REG
6150 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6151 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6153 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6154 HOST_WIDE_INT low, high;
6156 /* Detect coprocessor load/stores. */
6157 bool coproc_p = ((TARGET_HARD_FLOAT
6158 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
6159 && (mode == SFmode || mode == DFmode
6160 || (mode == DImode && TARGET_MAVERICK)))
6161 || (TARGET_REALLY_IWMMXT
6162 && VALID_IWMMXT_REG_MODE (mode))
6164 && (VALID_NEON_DREG_MODE (mode)
6165 || VALID_NEON_QREG_MODE (mode))));
6167 /* For some conditions, bail out when lower two bits are unaligned. */
6168 if ((val & 0x3) != 0
6169 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6171 /* For DI, and DF under soft-float: */
6172 || ((mode == DImode || mode == DFmode)
6173 /* Without ldrd, we use stm/ldm, which does not
6174 fair well with unaligned bits. */
6176 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6177 || TARGET_THUMB2))))
6180 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6181 of which the (reg+high) gets turned into a reload add insn,
6182 we try to decompose the index into high/low values that can often
6183 also lead to better reload CSE.
6185 ldr r0, [r2, #4100] // Offset too large
6186 ldr r1, [r2, #4104] // Offset too large
6188 is best reloaded as:
6194 which post-reload CSE can simplify in most cases to eliminate the
6195 second add instruction:
6200 The idea here is that we want to split out the bits of the constant
6201 as a mask, rather than as subtracting the maximum offset that the
6202 respective type of load/store used can handle.
6204 When encountering negative offsets, we can still utilize it even if
6205 the overall offset is positive; sometimes this may lead to an immediate
6206 that can be constructed with fewer instructions.
6208 ldr r0, [r2, #0x3FFFFC]
6210 This is best reloaded as:
6211 add t1, r2, #0x400000
6214 The trick for spotting this for a load insn with N bits of offset
6215 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6216 negative offset that is going to make bit N and all the bits below
6217 it become zero in the remainder part.
6219 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6220 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6221 used in most cases of ARM load/store instructions. */
6223 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6224 (((VAL) & ((1 << (N)) - 1)) \
6225 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6230 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6232 /* NEON quad-word load/stores are made of two double-word accesses,
6233 so the valid index range is reduced by 8. Treat as 9-bit range if
6235 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6236 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6238 else if (GET_MODE_SIZE (mode) == 8)
6241 low = (TARGET_THUMB2
6242 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6243 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6245 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6246 to access doublewords. The supported load/store offsets are
6247 -8, -4, and 4, which we try to produce here. */
6248 low = ((val & 0xf) ^ 0x8) - 0x8;
6250 else if (GET_MODE_SIZE (mode) < 8)
6252 /* NEON element load/stores do not have an offset. */
6253 if (TARGET_NEON_FP16 && mode == HFmode)
6258 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6259 Try the wider 12-bit range first, and re-try if the result
6261 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6263 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6267 if (mode == HImode || mode == HFmode)
6270 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6273 /* The storehi/movhi_bytes fallbacks can use only
6274 [-4094,+4094] of the full ldrb/strb index range. */
6275 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6276 if (low == 4095 || low == -4095)
6281 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6287 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6288 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6289 - (unsigned HOST_WIDE_INT) 0x80000000);
6290 /* Check for overflow or zero */
6291 if (low == 0 || high == 0 || (high + low != val))
6294 /* Reload the high part into a base reg; leave the low part
6296 *p = gen_rtx_PLUS (GET_MODE (*p),
6297 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6300 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6301 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6302 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6310 thumb_legitimize_reload_address (rtx *x_p,
6311 enum machine_mode mode,
6312 int opnum, int type,
6313 int ind_levels ATTRIBUTE_UNUSED)
6317 if (GET_CODE (x) == PLUS
6318 && GET_MODE_SIZE (mode) < 4
6319 && REG_P (XEXP (x, 0))
6320 && XEXP (x, 0) == stack_pointer_rtx
6321 && GET_CODE (XEXP (x, 1)) == CONST_INT
6322 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6327 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6328 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6332 /* If both registers are hi-regs, then it's better to reload the
6333 entire expression rather than each register individually. That
6334 only requires one reload register rather than two. */
6335 if (GET_CODE (x) == PLUS
6336 && REG_P (XEXP (x, 0))
6337 && REG_P (XEXP (x, 1))
6338 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6339 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6344 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6345 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6352 /* Test for various thread-local symbols. */
6354 /* Return TRUE if X is a thread-local symbol. */
6357 arm_tls_symbol_p (rtx x)
6359 if (! TARGET_HAVE_TLS)
6362 if (GET_CODE (x) != SYMBOL_REF)
6365 return SYMBOL_REF_TLS_MODEL (x) != 0;
6368 /* Helper for arm_tls_referenced_p. */
6371 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6373 if (GET_CODE (*x) == SYMBOL_REF)
6374 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6376 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6377 TLS offsets, not real symbol references. */
6378 if (GET_CODE (*x) == UNSPEC
6379 && XINT (*x, 1) == UNSPEC_TLS)
6385 /* Return TRUE if X contains any TLS symbol references. */
6388 arm_tls_referenced_p (rtx x)
6390 if (! TARGET_HAVE_TLS)
6393 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6396 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6398 On the ARM, allow any integer (invalid ones are removed later by insn
6399 patterns), nice doubles and symbol_refs which refer to the function's
6402 When generating pic allow anything. */
6405 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6407 /* At present, we have no support for Neon structure constants, so forbid
6408 them here. It might be possible to handle simple cases like 0 and -1
6410 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6413 return flag_pic || !label_mentioned_p (x);
6417 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6419 return (GET_CODE (x) == CONST_INT
6420 || GET_CODE (x) == CONST_DOUBLE
6421 || CONSTANT_ADDRESS_P (x)
6426 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6428 return (!arm_cannot_force_const_mem (mode, x)
6430 ? arm_legitimate_constant_p_1 (mode, x)
6431 : thumb_legitimate_constant_p (mode, x)));
6434 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6437 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6441 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6443 split_const (x, &base, &offset);
6444 if (GET_CODE (base) == SYMBOL_REF
6445 && !offset_within_block_p (base, INTVAL (offset)))
6448 return arm_tls_referenced_p (x);
6451 #define REG_OR_SUBREG_REG(X) \
6452 (GET_CODE (X) == REG \
6453 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6455 #define REG_OR_SUBREG_RTX(X) \
6456 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6459 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6461 enum machine_mode mode = GET_MODE (x);
6475 return COSTS_N_INSNS (1);
6478 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6481 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6488 return COSTS_N_INSNS (2) + cycles;
6490 return COSTS_N_INSNS (1) + 16;
6493 return (COSTS_N_INSNS (1)
6494 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6495 + GET_CODE (SET_DEST (x)) == MEM));
6500 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6502 if (thumb_shiftable_const (INTVAL (x)))
6503 return COSTS_N_INSNS (2);
6504 return COSTS_N_INSNS (3);
6506 else if ((outer == PLUS || outer == COMPARE)
6507 && INTVAL (x) < 256 && INTVAL (x) > -256)
6509 else if ((outer == IOR || outer == XOR || outer == AND)
6510 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6511 return COSTS_N_INSNS (1);
6512 else if (outer == AND)
6515 /* This duplicates the tests in the andsi3 expander. */
6516 for (i = 9; i <= 31; i++)
6517 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6518 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6519 return COSTS_N_INSNS (2);
6521 else if (outer == ASHIFT || outer == ASHIFTRT
6522 || outer == LSHIFTRT)
6524 return COSTS_N_INSNS (2);
6530 return COSTS_N_INSNS (3);
6548 /* XXX another guess. */
6549 /* Memory costs quite a lot for the first word, but subsequent words
6550 load at the equivalent of a single insn each. */
6551 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6552 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6557 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6563 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6564 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6570 return total + COSTS_N_INSNS (1);
6572 /* Assume a two-shift sequence. Increase the cost slightly so
6573 we prefer actual shifts over an extend operation. */
6574 return total + 1 + COSTS_N_INSNS (2);
6582 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6584 enum machine_mode mode = GET_MODE (x);
6585 enum rtx_code subcode;
6587 enum rtx_code code = GET_CODE (x);
6593 /* Memory costs quite a lot for the first word, but subsequent words
6594 load at the equivalent of a single insn each. */
6595 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6602 if (TARGET_HARD_FLOAT && mode == SFmode)
6603 *total = COSTS_N_INSNS (2);
6604 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6605 *total = COSTS_N_INSNS (4);
6607 *total = COSTS_N_INSNS (20);
6611 if (GET_CODE (XEXP (x, 1)) == REG)
6612 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6613 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6614 *total = rtx_cost (XEXP (x, 1), code, speed);
6620 *total += COSTS_N_INSNS (4);
6625 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6626 *total += rtx_cost (XEXP (x, 0), code, speed);
6629 *total += COSTS_N_INSNS (3);
6633 *total += COSTS_N_INSNS (1);
6634 /* Increase the cost of complex shifts because they aren't any faster,
6635 and reduce dual issue opportunities. */
6636 if (arm_tune_cortex_a9
6637 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6645 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6646 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6647 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6649 *total += rtx_cost (XEXP (x, 1), code, speed);
6653 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6654 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6656 *total += rtx_cost (XEXP (x, 0), code, speed);
6663 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6665 if (TARGET_HARD_FLOAT
6667 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6669 *total = COSTS_N_INSNS (1);
6670 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6671 && arm_const_double_rtx (XEXP (x, 0)))
6673 *total += rtx_cost (XEXP (x, 1), code, speed);
6677 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6678 && arm_const_double_rtx (XEXP (x, 1)))
6680 *total += rtx_cost (XEXP (x, 0), code, speed);
6686 *total = COSTS_N_INSNS (20);
6690 *total = COSTS_N_INSNS (1);
6691 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6692 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6694 *total += rtx_cost (XEXP (x, 1), code, speed);
6698 subcode = GET_CODE (XEXP (x, 1));
6699 if (subcode == ASHIFT || subcode == ASHIFTRT
6700 || subcode == LSHIFTRT
6701 || subcode == ROTATE || subcode == ROTATERT)
6703 *total += rtx_cost (XEXP (x, 0), code, speed);
6704 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6708 /* A shift as a part of RSB costs no more than RSB itself. */
6709 if (GET_CODE (XEXP (x, 0)) == MULT
6710 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6712 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6713 *total += rtx_cost (XEXP (x, 1), code, speed);
6718 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6720 *total += rtx_cost (XEXP (x, 0), code, speed);
6721 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6725 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6726 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6728 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6729 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6730 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6731 *total += COSTS_N_INSNS (1);
6739 if (code == PLUS && arm_arch6 && mode == SImode
6740 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6741 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6743 *total = COSTS_N_INSNS (1);
6744 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6746 *total += rtx_cost (XEXP (x, 1), code, speed);
6750 /* MLA: All arguments must be registers. We filter out
6751 multiplication by a power of two, so that we fall down into
6753 if (GET_CODE (XEXP (x, 0)) == MULT
6754 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6756 /* The cost comes from the cost of the multiply. */
6760 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6762 if (TARGET_HARD_FLOAT
6764 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6766 *total = COSTS_N_INSNS (1);
6767 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6768 && arm_const_double_rtx (XEXP (x, 1)))
6770 *total += rtx_cost (XEXP (x, 0), code, speed);
6777 *total = COSTS_N_INSNS (20);
6781 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6782 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6784 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6785 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6786 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6787 *total += COSTS_N_INSNS (1);
6793 case AND: case XOR: case IOR:
6795 /* Normally the frame registers will be spilt into reg+const during
6796 reload, so it is a bad idea to combine them with other instructions,
6797 since then they might not be moved outside of loops. As a compromise
6798 we allow integration with ops that have a constant as their second
6800 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6801 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6802 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6803 *total = COSTS_N_INSNS (1);
6807 *total += COSTS_N_INSNS (2);
6808 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6809 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6811 *total += rtx_cost (XEXP (x, 0), code, speed);
6818 *total += COSTS_N_INSNS (1);
6819 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6820 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6822 *total += rtx_cost (XEXP (x, 0), code, speed);
6825 subcode = GET_CODE (XEXP (x, 0));
6826 if (subcode == ASHIFT || subcode == ASHIFTRT
6827 || subcode == LSHIFTRT
6828 || subcode == ROTATE || subcode == ROTATERT)
6830 *total += rtx_cost (XEXP (x, 1), code, speed);
6831 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6836 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6838 *total += rtx_cost (XEXP (x, 1), code, speed);
6839 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6843 if (subcode == UMIN || subcode == UMAX
6844 || subcode == SMIN || subcode == SMAX)
6846 *total = COSTS_N_INSNS (3);
6853 /* This should have been handled by the CPU specific routines. */
6857 if (arm_arch3m && mode == SImode
6858 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6859 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6860 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6861 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6862 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6863 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6865 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6868 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6872 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6874 if (TARGET_HARD_FLOAT
6876 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6878 *total = COSTS_N_INSNS (1);
6881 *total = COSTS_N_INSNS (2);
6887 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6888 if (mode == SImode && code == NOT)
6890 subcode = GET_CODE (XEXP (x, 0));
6891 if (subcode == ASHIFT || subcode == ASHIFTRT
6892 || subcode == LSHIFTRT
6893 || subcode == ROTATE || subcode == ROTATERT
6895 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6897 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6898 /* Register shifts cost an extra cycle. */
6899 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6900 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6909 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6911 *total = COSTS_N_INSNS (4);
6915 operand = XEXP (x, 0);
6917 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6918 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6919 && GET_CODE (XEXP (operand, 0)) == REG
6920 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6921 *total += COSTS_N_INSNS (1);
6922 *total += (rtx_cost (XEXP (x, 1), code, speed)
6923 + rtx_cost (XEXP (x, 2), code, speed));
6927 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6929 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6935 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6936 && mode == SImode && XEXP (x, 1) == const0_rtx)
6938 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6944 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6945 && mode == SImode && XEXP (x, 1) == const0_rtx)
6947 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6967 /* SCC insns. In the case where the comparison has already been
6968 performed, then they cost 2 instructions. Otherwise they need
6969 an additional comparison before them. */
6970 *total = COSTS_N_INSNS (2);
6971 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6978 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6984 *total += COSTS_N_INSNS (1);
6985 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6986 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6988 *total += rtx_cost (XEXP (x, 0), code, speed);
6992 subcode = GET_CODE (XEXP (x, 0));
6993 if (subcode == ASHIFT || subcode == ASHIFTRT
6994 || subcode == LSHIFTRT
6995 || subcode == ROTATE || subcode == ROTATERT)
6997 *total += rtx_cost (XEXP (x, 1), code, speed);
6998 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7003 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7005 *total += rtx_cost (XEXP (x, 1), code, speed);
7006 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
7016 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
7017 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7018 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7019 *total += rtx_cost (XEXP (x, 1), code, speed);
7023 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7025 if (TARGET_HARD_FLOAT
7027 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7029 *total = COSTS_N_INSNS (1);
7032 *total = COSTS_N_INSNS (20);
7035 *total = COSTS_N_INSNS (1);
7037 *total += COSTS_N_INSNS (3);
7043 if (GET_MODE_CLASS (mode) == MODE_INT)
7045 rtx op = XEXP (x, 0);
7046 enum machine_mode opmode = GET_MODE (op);
7049 *total += COSTS_N_INSNS (1);
7051 if (opmode != SImode)
7055 /* If !arm_arch4, we use one of the extendhisi2_mem
7056 or movhi_bytes patterns for HImode. For a QImode
7057 sign extension, we first zero-extend from memory
7058 and then perform a shift sequence. */
7059 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7060 *total += COSTS_N_INSNS (2);
7063 *total += COSTS_N_INSNS (1);
7065 /* We don't have the necessary insn, so we need to perform some
7067 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7068 /* An and with constant 255. */
7069 *total += COSTS_N_INSNS (1);
7071 /* A shift sequence. Increase costs slightly to avoid
7072 combining two shifts into an extend operation. */
7073 *total += COSTS_N_INSNS (2) + 1;
7079 switch (GET_MODE (XEXP (x, 0)))
7086 *total = COSTS_N_INSNS (1);
7096 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7100 if (const_ok_for_arm (INTVAL (x))
7101 || const_ok_for_arm (~INTVAL (x)))
7102 *total = COSTS_N_INSNS (1);
7104 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7105 INTVAL (x), NULL_RTX,
7112 *total = COSTS_N_INSNS (3);
7116 *total = COSTS_N_INSNS (1);
7120 *total = COSTS_N_INSNS (1);
7121 *total += rtx_cost (XEXP (x, 0), code, speed);
7125 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7126 && (mode == SFmode || !TARGET_VFP_SINGLE))
7127 *total = COSTS_N_INSNS (1);
7129 *total = COSTS_N_INSNS (4);
7133 *total = COSTS_N_INSNS (4);
7138 /* Estimates the size cost of thumb1 instructions.
7139 For now most of the code is copied from thumb1_rtx_costs. We need more
7140 fine grain tuning when we have more related test cases. */
7142 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7144 enum machine_mode mode = GET_MODE (x);
7157 return COSTS_N_INSNS (1);
7160 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7162 /* Thumb1 mul instruction can't operate on const. We must Load it
7163 into a register first. */
7164 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7165 return COSTS_N_INSNS (1) + const_size;
7167 return COSTS_N_INSNS (1);
7170 return (COSTS_N_INSNS (1)
7171 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7172 + GET_CODE (SET_DEST (x)) == MEM));
7177 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7178 return COSTS_N_INSNS (1);
7179 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7180 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7181 return COSTS_N_INSNS (2);
7182 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7183 if (thumb_shiftable_const (INTVAL (x)))
7184 return COSTS_N_INSNS (2);
7185 return COSTS_N_INSNS (3);
7187 else if ((outer == PLUS || outer == COMPARE)
7188 && INTVAL (x) < 256 && INTVAL (x) > -256)
7190 else if ((outer == IOR || outer == XOR || outer == AND)
7191 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7192 return COSTS_N_INSNS (1);
7193 else if (outer == AND)
7196 /* This duplicates the tests in the andsi3 expander. */
7197 for (i = 9; i <= 31; i++)
7198 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7199 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7200 return COSTS_N_INSNS (2);
7202 else if (outer == ASHIFT || outer == ASHIFTRT
7203 || outer == LSHIFTRT)
7205 return COSTS_N_INSNS (2);
7211 return COSTS_N_INSNS (3);
7229 /* XXX another guess. */
7230 /* Memory costs quite a lot for the first word, but subsequent words
7231 load at the equivalent of a single insn each. */
7232 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7233 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7238 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7243 /* XXX still guessing. */
7244 switch (GET_MODE (XEXP (x, 0)))
7247 return (1 + (mode == DImode ? 4 : 0)
7248 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7251 return (4 + (mode == DImode ? 4 : 0)
7252 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7255 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7266 /* RTX costs when optimizing for size. */
7268 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7271 enum machine_mode mode = GET_MODE (x);
7274 *total = thumb1_size_rtx_costs (x, code, outer_code);
7278 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7282 /* A memory access costs 1 insn if the mode is small, or the address is
7283 a single register, otherwise it costs one insn per word. */
7284 if (REG_P (XEXP (x, 0)))
7285 *total = COSTS_N_INSNS (1);
7287 && GET_CODE (XEXP (x, 0)) == PLUS
7288 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7289 /* This will be split into two instructions.
7290 See arm.md:calculate_pic_address. */
7291 *total = COSTS_N_INSNS (2);
7293 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7300 /* Needs a libcall, so it costs about this. */
7301 *total = COSTS_N_INSNS (2);
7305 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7307 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7315 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7317 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7320 else if (mode == SImode)
7322 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7323 /* Slightly disparage register shifts, but not by much. */
7324 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7325 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7329 /* Needs a libcall. */
7330 *total = COSTS_N_INSNS (2);
7334 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7335 && (mode == SFmode || !TARGET_VFP_SINGLE))
7337 *total = COSTS_N_INSNS (1);
7343 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7344 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7346 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7347 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7348 || subcode1 == ROTATE || subcode1 == ROTATERT
7349 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7350 || subcode1 == ASHIFTRT)
7352 /* It's just the cost of the two operands. */
7357 *total = COSTS_N_INSNS (1);
7361 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7365 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7366 && (mode == SFmode || !TARGET_VFP_SINGLE))
7368 *total = COSTS_N_INSNS (1);
7372 /* A shift as a part of ADD costs nothing. */
7373 if (GET_CODE (XEXP (x, 0)) == MULT
7374 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7376 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7377 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7378 *total += rtx_cost (XEXP (x, 1), code, false);
7383 case AND: case XOR: case IOR:
7386 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7388 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7389 || subcode == LSHIFTRT || subcode == ASHIFTRT
7390 || (code == AND && subcode == NOT))
7392 /* It's just the cost of the two operands. */
7398 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7402 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7406 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7407 && (mode == SFmode || !TARGET_VFP_SINGLE))
7409 *total = COSTS_N_INSNS (1);
7415 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7424 if (cc_register (XEXP (x, 0), VOIDmode))
7427 *total = COSTS_N_INSNS (1);
7431 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7432 && (mode == SFmode || !TARGET_VFP_SINGLE))
7433 *total = COSTS_N_INSNS (1);
7435 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7440 return arm_rtx_costs_1 (x, outer_code, total, 0);
7443 if (const_ok_for_arm (INTVAL (x)))
7444 /* A multiplication by a constant requires another instruction
7445 to load the constant to a register. */
7446 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7448 else if (const_ok_for_arm (~INTVAL (x)))
7449 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7450 else if (const_ok_for_arm (-INTVAL (x)))
7452 if (outer_code == COMPARE || outer_code == PLUS
7453 || outer_code == MINUS)
7456 *total = COSTS_N_INSNS (1);
7459 *total = COSTS_N_INSNS (2);
7465 *total = COSTS_N_INSNS (2);
7469 *total = COSTS_N_INSNS (4);
7474 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7475 cost of these slightly. */
7476 *total = COSTS_N_INSNS (1) + 1;
7480 if (mode != VOIDmode)
7481 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7483 *total = COSTS_N_INSNS (4); /* How knows? */
7488 /* RTX costs when optimizing for size. */
7490 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7494 return arm_size_rtx_costs (x, (enum rtx_code) code,
7495 (enum rtx_code) outer_code, total);
7497 return current_tune->rtx_costs (x, (enum rtx_code) code,
7498 (enum rtx_code) outer_code,
7502 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7503 supported on any "slowmul" cores, so it can be ignored. */
7506 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7507 int *total, bool speed)
7509 enum machine_mode mode = GET_MODE (x);
7513 *total = thumb1_rtx_costs (x, code, outer_code);
7520 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7523 *total = COSTS_N_INSNS (20);
7527 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7529 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7530 & (unsigned HOST_WIDE_INT) 0xffffffff);
7531 int cost, const_ok = const_ok_for_arm (i);
7532 int j, booth_unit_size;
7534 /* Tune as appropriate. */
7535 cost = const_ok ? 4 : 8;
7536 booth_unit_size = 2;
7537 for (j = 0; i && j < 32; j += booth_unit_size)
7539 i >>= booth_unit_size;
7543 *total = COSTS_N_INSNS (cost);
7544 *total += rtx_cost (XEXP (x, 0), code, speed);
7548 *total = COSTS_N_INSNS (20);
7552 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7557 /* RTX cost for cores with a fast multiply unit (M variants). */
7560 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7561 int *total, bool speed)
7563 enum machine_mode mode = GET_MODE (x);
7567 *total = thumb1_rtx_costs (x, code, outer_code);
7571 /* ??? should thumb2 use different costs? */
7575 /* There is no point basing this on the tuning, since it is always the
7576 fast variant if it exists at all. */
7578 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7579 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7580 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7582 *total = COSTS_N_INSNS(2);
7589 *total = COSTS_N_INSNS (5);
7593 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7595 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7596 & (unsigned HOST_WIDE_INT) 0xffffffff);
7597 int cost, const_ok = const_ok_for_arm (i);
7598 int j, booth_unit_size;
7600 /* Tune as appropriate. */
7601 cost = const_ok ? 4 : 8;
7602 booth_unit_size = 8;
7603 for (j = 0; i && j < 32; j += booth_unit_size)
7605 i >>= booth_unit_size;
7609 *total = COSTS_N_INSNS(cost);
7615 *total = COSTS_N_INSNS (4);
7619 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7621 if (TARGET_HARD_FLOAT
7623 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7625 *total = COSTS_N_INSNS (1);
7630 /* Requires a lib call */
7631 *total = COSTS_N_INSNS (20);
7635 return arm_rtx_costs_1 (x, outer_code, total, speed);
7640 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7641 so it can be ignored. */
7644 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7645 int *total, bool speed)
7647 enum machine_mode mode = GET_MODE (x);
7651 *total = thumb1_rtx_costs (x, code, outer_code);
7658 if (GET_CODE (XEXP (x, 0)) != MULT)
7659 return arm_rtx_costs_1 (x, outer_code, total, speed);
7661 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7662 will stall until the multiplication is complete. */
7663 *total = COSTS_N_INSNS (3);
7667 /* There is no point basing this on the tuning, since it is always the
7668 fast variant if it exists at all. */
7670 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7671 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7672 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7674 *total = COSTS_N_INSNS (2);
7681 *total = COSTS_N_INSNS (5);
7685 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7687 /* If operand 1 is a constant we can more accurately
7688 calculate the cost of the multiply. The multiplier can
7689 retire 15 bits on the first cycle and a further 12 on the
7690 second. We do, of course, have to load the constant into
7691 a register first. */
7692 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7693 /* There's a general overhead of one cycle. */
7695 unsigned HOST_WIDE_INT masked_const;
7700 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7702 masked_const = i & 0xffff8000;
7703 if (masked_const != 0)
7706 masked_const = i & 0xf8000000;
7707 if (masked_const != 0)
7710 *total = COSTS_N_INSNS (cost);
7716 *total = COSTS_N_INSNS (3);
7720 /* Requires a lib call */
7721 *total = COSTS_N_INSNS (20);
7725 return arm_rtx_costs_1 (x, outer_code, total, speed);
7730 /* RTX costs for 9e (and later) cores. */
7733 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7734 int *total, bool speed)
7736 enum machine_mode mode = GET_MODE (x);
7743 *total = COSTS_N_INSNS (3);
7747 *total = thumb1_rtx_costs (x, code, outer_code);
7755 /* There is no point basing this on the tuning, since it is always the
7756 fast variant if it exists at all. */
7758 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7759 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7760 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7762 *total = COSTS_N_INSNS (2);
7769 *total = COSTS_N_INSNS (5);
7775 *total = COSTS_N_INSNS (2);
7779 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7781 if (TARGET_HARD_FLOAT
7783 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7785 *total = COSTS_N_INSNS (1);
7790 *total = COSTS_N_INSNS (20);
7794 return arm_rtx_costs_1 (x, outer_code, total, speed);
7797 /* All address computations that can be done are free, but rtx cost returns
7798 the same for practically all of them. So we weight the different types
7799 of address here in the order (most pref first):
7800 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7802 arm_arm_address_cost (rtx x)
7804 enum rtx_code c = GET_CODE (x);
7806 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7808 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7813 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7816 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7826 arm_thumb_address_cost (rtx x)
7828 enum rtx_code c = GET_CODE (x);
7833 && GET_CODE (XEXP (x, 0)) == REG
7834 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7841 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7843 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7846 /* Adjust cost hook for XScale. */
7848 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7850 /* Some true dependencies can have a higher cost depending
7851 on precisely how certain input operands are used. */
7852 if (REG_NOTE_KIND(link) == 0
7853 && recog_memoized (insn) >= 0
7854 && recog_memoized (dep) >= 0)
7856 int shift_opnum = get_attr_shift (insn);
7857 enum attr_type attr_type = get_attr_type (dep);
7859 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7860 operand for INSN. If we have a shifted input operand and the
7861 instruction we depend on is another ALU instruction, then we may
7862 have to account for an additional stall. */
7863 if (shift_opnum != 0
7864 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7866 rtx shifted_operand;
7869 /* Get the shifted operand. */
7870 extract_insn (insn);
7871 shifted_operand = recog_data.operand[shift_opnum];
7873 /* Iterate over all the operands in DEP. If we write an operand
7874 that overlaps with SHIFTED_OPERAND, then we have increase the
7875 cost of this dependency. */
7877 preprocess_constraints ();
7878 for (opno = 0; opno < recog_data.n_operands; opno++)
7880 /* We can ignore strict inputs. */
7881 if (recog_data.operand_type[opno] == OP_IN)
7884 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7896 /* Adjust cost hook for Cortex A9. */
7898 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7900 switch (REG_NOTE_KIND (link))
7907 case REG_DEP_OUTPUT:
7908 if (recog_memoized (insn) >= 0
7909 && recog_memoized (dep) >= 0)
7911 if (GET_CODE (PATTERN (insn)) == SET)
7914 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
7916 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
7918 enum attr_type attr_type_insn = get_attr_type (insn);
7919 enum attr_type attr_type_dep = get_attr_type (dep);
7921 /* By default all dependencies of the form
7924 have an extra latency of 1 cycle because
7925 of the input and output dependency in this
7926 case. However this gets modeled as an true
7927 dependency and hence all these checks. */
7928 if (REG_P (SET_DEST (PATTERN (insn)))
7929 && REG_P (SET_DEST (PATTERN (dep)))
7930 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
7931 SET_DEST (PATTERN (dep))))
7933 /* FMACS is a special case where the dependant
7934 instruction can be issued 3 cycles before
7935 the normal latency in case of an output
7937 if ((attr_type_insn == TYPE_FMACS
7938 || attr_type_insn == TYPE_FMACD)
7939 && (attr_type_dep == TYPE_FMACS
7940 || attr_type_dep == TYPE_FMACD))
7942 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7943 *cost = insn_default_latency (dep) - 3;
7945 *cost = insn_default_latency (dep);
7950 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7951 *cost = insn_default_latency (dep) + 1;
7953 *cost = insn_default_latency (dep);
7969 /* Adjust cost hook for FA726TE. */
7971 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7973 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
7974 have penalty of 3. */
7975 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
7976 && recog_memoized (insn) >= 0
7977 && recog_memoized (dep) >= 0
7978 && get_attr_conds (dep) == CONDS_SET)
7980 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
7981 if (get_attr_conds (insn) == CONDS_USE
7982 && get_attr_type (insn) != TYPE_BRANCH)
7988 if (GET_CODE (PATTERN (insn)) == COND_EXEC
7989 || get_attr_conds (insn) == CONDS_USE)
7999 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8000 It corrects the value of COST based on the relationship between
8001 INSN and DEP through the dependence LINK. It returns the new
8002 value. There is a per-core adjust_cost hook to adjust scheduler costs
8003 and the per-core hook can choose to completely override the generic
8004 adjust_cost function. Only put bits of code into arm_adjust_cost that
8005 are common across all cores. */
8007 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8011 /* When generating Thumb-1 code, we want to place flag-setting operations
8012 close to a conditional branch which depends on them, so that we can
8013 omit the comparison. */
8015 && REG_NOTE_KIND (link) == 0
8016 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8017 && recog_memoized (dep) >= 0
8018 && get_attr_conds (dep) == CONDS_SET)
8021 if (current_tune->sched_adjust_cost != NULL)
8023 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8027 /* XXX This is not strictly true for the FPA. */
8028 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8029 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8032 /* Call insns don't incur a stall, even if they follow a load. */
8033 if (REG_NOTE_KIND (link) == 0
8034 && GET_CODE (insn) == CALL_INSN)
8037 if ((i_pat = single_set (insn)) != NULL
8038 && GET_CODE (SET_SRC (i_pat)) == MEM
8039 && (d_pat = single_set (dep)) != NULL
8040 && GET_CODE (SET_DEST (d_pat)) == MEM)
8042 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8043 /* This is a load after a store, there is no conflict if the load reads
8044 from a cached area. Assume that loads from the stack, and from the
8045 constant pool are cached, and that others will miss. This is a
8048 if ((GET_CODE (src_mem) == SYMBOL_REF
8049 && CONSTANT_POOL_ADDRESS_P (src_mem))
8050 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8051 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8052 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8060 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8063 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8065 return (optimize > 0) ? 2 : 0;
8069 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8071 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8074 static int fp_consts_inited = 0;
8076 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8077 static const char * const strings_fp[8] =
8080 "4", "5", "0.5", "10"
8083 static REAL_VALUE_TYPE values_fp[8];
8086 init_fp_table (void)
8092 fp_consts_inited = 1;
8094 fp_consts_inited = 8;
8096 for (i = 0; i < fp_consts_inited; i++)
8098 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8103 /* Return TRUE if rtx X is a valid immediate FP constant. */
8105 arm_const_double_rtx (rtx x)
8110 if (!fp_consts_inited)
8113 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8114 if (REAL_VALUE_MINUS_ZERO (r))
8117 for (i = 0; i < fp_consts_inited; i++)
8118 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8124 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8126 neg_const_double_rtx_ok_for_fpa (rtx x)
8131 if (!fp_consts_inited)
8134 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8135 r = real_value_negate (&r);
8136 if (REAL_VALUE_MINUS_ZERO (r))
8139 for (i = 0; i < 8; i++)
8140 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8147 /* VFPv3 has a fairly wide range of representable immediates, formed from
8148 "quarter-precision" floating-point values. These can be evaluated using this
8149 formula (with ^ for exponentiation):
8153 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8154 16 <= n <= 31 and 0 <= r <= 7.
8156 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8158 - A (most-significant) is the sign bit.
8159 - BCD are the exponent (encoded as r XOR 3).
8160 - EFGH are the mantissa (encoded as n - 16).
8163 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8164 fconst[sd] instruction, or -1 if X isn't suitable. */
8166 vfp3_const_double_index (rtx x)
8168 REAL_VALUE_TYPE r, m;
8170 unsigned HOST_WIDE_INT mantissa, mant_hi;
8171 unsigned HOST_WIDE_INT mask;
8172 HOST_WIDE_INT m1, m2;
8173 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8175 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8178 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8180 /* We can't represent these things, so detect them first. */
8181 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8184 /* Extract sign, exponent and mantissa. */
8185 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8186 r = real_value_abs (&r);
8187 exponent = REAL_EXP (&r);
8188 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8189 highest (sign) bit, with a fixed binary point at bit point_pos.
8190 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8191 bits for the mantissa, this may fail (low bits would be lost). */
8192 real_ldexp (&m, &r, point_pos - exponent);
8193 REAL_VALUE_TO_INT (&m1, &m2, m);
8197 /* If there are bits set in the low part of the mantissa, we can't
8198 represent this value. */
8202 /* Now make it so that mantissa contains the most-significant bits, and move
8203 the point_pos to indicate that the least-significant bits have been
8205 point_pos -= HOST_BITS_PER_WIDE_INT;
8208 /* We can permit four significant bits of mantissa only, plus a high bit
8209 which is always 1. */
8210 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8211 if ((mantissa & mask) != 0)
8214 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8215 mantissa >>= point_pos - 5;
8217 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8218 floating-point immediate zero with Neon using an integer-zero load, but
8219 that case is handled elsewhere.) */
8223 gcc_assert (mantissa >= 16 && mantissa <= 31);
8225 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8226 normalized significands are in the range [1, 2). (Our mantissa is shifted
8227 left 4 places at this point relative to normalized IEEE754 values). GCC
8228 internally uses [0.5, 1) (see real.c), so the exponent returned from
8229 REAL_EXP must be altered. */
8230 exponent = 5 - exponent;
8232 if (exponent < 0 || exponent > 7)
8235 /* Sign, mantissa and exponent are now in the correct form to plug into the
8236 formula described in the comment above. */
8237 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8240 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8242 vfp3_const_double_rtx (rtx x)
8247 return vfp3_const_double_index (x) != -1;
8250 /* Recognize immediates which can be used in various Neon instructions. Legal
8251 immediates are described by the following table (for VMVN variants, the
8252 bitwise inverse of the constant shown is recognized. In either case, VMOV
8253 is output and the correct instruction to use for a given constant is chosen
8254 by the assembler). The constant shown is replicated across all elements of
8255 the destination vector.
8257 insn elems variant constant (binary)
8258 ---- ----- ------- -----------------
8259 vmov i32 0 00000000 00000000 00000000 abcdefgh
8260 vmov i32 1 00000000 00000000 abcdefgh 00000000
8261 vmov i32 2 00000000 abcdefgh 00000000 00000000
8262 vmov i32 3 abcdefgh 00000000 00000000 00000000
8263 vmov i16 4 00000000 abcdefgh
8264 vmov i16 5 abcdefgh 00000000
8265 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8266 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8267 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8268 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8269 vmvn i16 10 00000000 abcdefgh
8270 vmvn i16 11 abcdefgh 00000000
8271 vmov i32 12 00000000 00000000 abcdefgh 11111111
8272 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8273 vmov i32 14 00000000 abcdefgh 11111111 11111111
8274 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8276 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8277 eeeeeeee ffffffff gggggggg hhhhhhhh
8278 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8280 For case 18, B = !b. Representable values are exactly those accepted by
8281 vfp3_const_double_index, but are output as floating-point numbers rather
8284 Variants 0-5 (inclusive) may also be used as immediates for the second
8285 operand of VORR/VBIC instructions.
8287 The INVERSE argument causes the bitwise inverse of the given operand to be
8288 recognized instead (used for recognizing legal immediates for the VAND/VORN
8289 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8290 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8291 output, rather than the real insns vbic/vorr).
8293 INVERSE makes no difference to the recognition of float vectors.
8295 The return value is the variant of immediate as shown in the above table, or
8296 -1 if the given value doesn't match any of the listed patterns.
8299 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8300 rtx *modconst, int *elementwidth)
8302 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8304 for (i = 0; i < idx; i += (STRIDE)) \
8309 immtype = (CLASS); \
8310 elsize = (ELSIZE); \
8314 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8315 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8316 unsigned char bytes[16];
8317 int immtype = -1, matches;
8318 unsigned int invmask = inverse ? 0xff : 0;
8320 /* Vectors of float constants. */
8321 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8323 rtx el0 = CONST_VECTOR_ELT (op, 0);
8326 if (!vfp3_const_double_rtx (el0))
8329 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8331 for (i = 1; i < n_elts; i++)
8333 rtx elt = CONST_VECTOR_ELT (op, i);
8336 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8338 if (!REAL_VALUES_EQUAL (r0, re))
8343 *modconst = CONST_VECTOR_ELT (op, 0);
8351 /* Splat vector constant out into a byte vector. */
8352 for (i = 0; i < n_elts; i++)
8354 rtx el = CONST_VECTOR_ELT (op, i);
8355 unsigned HOST_WIDE_INT elpart;
8356 unsigned int part, parts;
8358 if (GET_CODE (el) == CONST_INT)
8360 elpart = INTVAL (el);
8363 else if (GET_CODE (el) == CONST_DOUBLE)
8365 elpart = CONST_DOUBLE_LOW (el);
8371 for (part = 0; part < parts; part++)
8374 for (byte = 0; byte < innersize; byte++)
8376 bytes[idx++] = (elpart & 0xff) ^ invmask;
8377 elpart >>= BITS_PER_UNIT;
8379 if (GET_CODE (el) == CONST_DOUBLE)
8380 elpart = CONST_DOUBLE_HIGH (el);
8385 gcc_assert (idx == GET_MODE_SIZE (mode));
8389 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8390 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8392 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8393 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8395 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8396 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8398 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8399 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8401 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8403 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8405 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8406 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8408 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8409 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8411 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8412 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8414 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8415 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8417 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8419 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8421 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8422 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8424 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8425 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8427 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8428 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8430 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8431 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8433 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8435 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8436 && bytes[i] == bytes[(i + 8) % idx]);
8444 *elementwidth = elsize;
8448 unsigned HOST_WIDE_INT imm = 0;
8450 /* Un-invert bytes of recognized vector, if necessary. */
8452 for (i = 0; i < idx; i++)
8453 bytes[i] ^= invmask;
8457 /* FIXME: Broken on 32-bit H_W_I hosts. */
8458 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8460 for (i = 0; i < 8; i++)
8461 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8462 << (i * BITS_PER_UNIT);
8464 *modconst = GEN_INT (imm);
8468 unsigned HOST_WIDE_INT imm = 0;
8470 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8471 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8473 *modconst = GEN_INT (imm);
8481 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8482 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8483 float elements), and a modified constant (whatever should be output for a
8484 VMOV) in *MODCONST. */
8487 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8488 rtx *modconst, int *elementwidth)
8492 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8498 *modconst = tmpconst;
8501 *elementwidth = tmpwidth;
8506 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8507 the immediate is valid, write a constant suitable for using as an operand
8508 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8509 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8512 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8513 rtx *modconst, int *elementwidth)
8517 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8519 if (retval < 0 || retval > 5)
8523 *modconst = tmpconst;
8526 *elementwidth = tmpwidth;
8531 /* Return a string suitable for output of Neon immediate logic operation
8535 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8536 int inverse, int quad)
8538 int width, is_valid;
8539 static char templ[40];
8541 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8543 gcc_assert (is_valid != 0);
8546 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8548 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8553 /* Output a sequence of pairwise operations to implement a reduction.
8554 NOTE: We do "too much work" here, because pairwise operations work on two
8555 registers-worth of operands in one go. Unfortunately we can't exploit those
8556 extra calculations to do the full operation in fewer steps, I don't think.
8557 Although all vector elements of the result but the first are ignored, we
8558 actually calculate the same result in each of the elements. An alternative
8559 such as initially loading a vector with zero to use as each of the second
8560 operands would use up an additional register and take an extra instruction,
8561 for no particular gain. */
8564 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8565 rtx (*reduc) (rtx, rtx, rtx))
8567 enum machine_mode inner = GET_MODE_INNER (mode);
8568 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8571 for (i = parts / 2; i >= 1; i /= 2)
8573 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8574 emit_insn (reduc (dest, tmpsum, tmpsum));
8579 /* If VALS is a vector constant that can be loaded into a register
8580 using VDUP, generate instructions to do so and return an RTX to
8581 assign to the register. Otherwise return NULL_RTX. */
8584 neon_vdup_constant (rtx vals)
8586 enum machine_mode mode = GET_MODE (vals);
8587 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8588 int n_elts = GET_MODE_NUNITS (mode);
8589 bool all_same = true;
8593 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8596 for (i = 0; i < n_elts; ++i)
8598 x = XVECEXP (vals, 0, i);
8599 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8604 /* The elements are not all the same. We could handle repeating
8605 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8606 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8610 /* We can load this constant by using VDUP and a constant in a
8611 single ARM register. This will be cheaper than a vector
8614 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8615 return gen_rtx_VEC_DUPLICATE (mode, x);
8618 /* Generate code to load VALS, which is a PARALLEL containing only
8619 constants (for vec_init) or CONST_VECTOR, efficiently into a
8620 register. Returns an RTX to copy into the register, or NULL_RTX
8621 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8624 neon_make_constant (rtx vals)
8626 enum machine_mode mode = GET_MODE (vals);
8628 rtx const_vec = NULL_RTX;
8629 int n_elts = GET_MODE_NUNITS (mode);
8633 if (GET_CODE (vals) == CONST_VECTOR)
8635 else if (GET_CODE (vals) == PARALLEL)
8637 /* A CONST_VECTOR must contain only CONST_INTs and
8638 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8639 Only store valid constants in a CONST_VECTOR. */
8640 for (i = 0; i < n_elts; ++i)
8642 rtx x = XVECEXP (vals, 0, i);
8643 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8646 if (n_const == n_elts)
8647 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8652 if (const_vec != NULL
8653 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8654 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8656 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8657 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8658 pipeline cycle; creating the constant takes one or two ARM
8661 else if (const_vec != NULL_RTX)
8662 /* Load from constant pool. On Cortex-A8 this takes two cycles
8663 (for either double or quad vectors). We can not take advantage
8664 of single-cycle VLD1 because we need a PC-relative addressing
8668 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8669 We can not construct an initializer. */
8673 /* Initialize vector TARGET to VALS. */
8676 neon_expand_vector_init (rtx target, rtx vals)
8678 enum machine_mode mode = GET_MODE (target);
8679 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8680 int n_elts = GET_MODE_NUNITS (mode);
8681 int n_var = 0, one_var = -1;
8682 bool all_same = true;
8686 for (i = 0; i < n_elts; ++i)
8688 x = XVECEXP (vals, 0, i);
8689 if (!CONSTANT_P (x))
8690 ++n_var, one_var = i;
8692 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8698 rtx constant = neon_make_constant (vals);
8699 if (constant != NULL_RTX)
8701 emit_move_insn (target, constant);
8706 /* Splat a single non-constant element if we can. */
8707 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8709 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8710 emit_insn (gen_rtx_SET (VOIDmode, target,
8711 gen_rtx_VEC_DUPLICATE (mode, x)));
8715 /* One field is non-constant. Load constant then overwrite varying
8716 field. This is more efficient than using the stack. */
8719 rtx copy = copy_rtx (vals);
8720 rtx index = GEN_INT (one_var);
8722 /* Load constant part of vector, substitute neighboring value for
8724 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8725 neon_expand_vector_init (target, copy);
8727 /* Insert variable. */
8728 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8732 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8735 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8738 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8741 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8744 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8747 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8750 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8753 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8756 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8764 /* Construct the vector in memory one field at a time
8765 and load the whole vector. */
8766 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8767 for (i = 0; i < n_elts; i++)
8768 emit_move_insn (adjust_address_nv (mem, inner_mode,
8769 i * GET_MODE_SIZE (inner_mode)),
8770 XVECEXP (vals, 0, i));
8771 emit_move_insn (target, mem);
8774 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8775 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8776 reported source locations are bogus. */
8779 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8784 gcc_assert (GET_CODE (operand) == CONST_INT);
8786 lane = INTVAL (operand);
8788 if (lane < low || lane >= high)
8792 /* Bounds-check lanes. */
8795 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8797 bounds_check (operand, low, high, "lane out of range");
8800 /* Bounds-check constants. */
8803 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8805 bounds_check (operand, low, high, "constant out of range");
8809 neon_element_bits (enum machine_mode mode)
8812 return GET_MODE_BITSIZE (mode);
8814 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8818 /* Predicates for `match_operand' and `match_operator'. */
8820 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8822 cirrus_memory_offset (rtx op)
8824 /* Reject eliminable registers. */
8825 if (! (reload_in_progress || reload_completed)
8826 && ( reg_mentioned_p (frame_pointer_rtx, op)
8827 || reg_mentioned_p (arg_pointer_rtx, op)
8828 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8829 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8830 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8831 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8834 if (GET_CODE (op) == MEM)
8840 /* Match: (mem (reg)). */
8841 if (GET_CODE (ind) == REG)
8847 if (GET_CODE (ind) == PLUS
8848 && GET_CODE (XEXP (ind, 0)) == REG
8849 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8850 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8857 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8858 WB is true if full writeback address modes are allowed and is false
8859 if limited writeback address modes (POST_INC and PRE_DEC) are
8863 arm_coproc_mem_operand (rtx op, bool wb)
8867 /* Reject eliminable registers. */
8868 if (! (reload_in_progress || reload_completed)
8869 && ( reg_mentioned_p (frame_pointer_rtx, op)
8870 || reg_mentioned_p (arg_pointer_rtx, op)
8871 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8872 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8873 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8874 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8877 /* Constants are converted into offsets from labels. */
8878 if (GET_CODE (op) != MEM)
8883 if (reload_completed
8884 && (GET_CODE (ind) == LABEL_REF
8885 || (GET_CODE (ind) == CONST
8886 && GET_CODE (XEXP (ind, 0)) == PLUS
8887 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8888 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8891 /* Match: (mem (reg)). */
8892 if (GET_CODE (ind) == REG)
8893 return arm_address_register_rtx_p (ind, 0);
8895 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8896 acceptable in any case (subject to verification by
8897 arm_address_register_rtx_p). We need WB to be true to accept
8898 PRE_INC and POST_DEC. */
8899 if (GET_CODE (ind) == POST_INC
8900 || GET_CODE (ind) == PRE_DEC
8902 && (GET_CODE (ind) == PRE_INC
8903 || GET_CODE (ind) == POST_DEC)))
8904 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8907 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8908 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8909 && GET_CODE (XEXP (ind, 1)) == PLUS
8910 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8911 ind = XEXP (ind, 1);
8916 if (GET_CODE (ind) == PLUS
8917 && GET_CODE (XEXP (ind, 0)) == REG
8918 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8919 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8920 && INTVAL (XEXP (ind, 1)) > -1024
8921 && INTVAL (XEXP (ind, 1)) < 1024
8922 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8928 /* Return TRUE if OP is a memory operand which we can load or store a vector
8929 to/from. TYPE is one of the following values:
8930 0 - Vector load/stor (vldr)
8931 1 - Core registers (ldm)
8932 2 - Element/structure loads (vld1)
8935 neon_vector_mem_operand (rtx op, int type)
8939 /* Reject eliminable registers. */
8940 if (! (reload_in_progress || reload_completed)
8941 && ( reg_mentioned_p (frame_pointer_rtx, op)
8942 || reg_mentioned_p (arg_pointer_rtx, op)
8943 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8944 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8945 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8946 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8949 /* Constants are converted into offsets from labels. */
8950 if (GET_CODE (op) != MEM)
8955 if (reload_completed
8956 && (GET_CODE (ind) == LABEL_REF
8957 || (GET_CODE (ind) == CONST
8958 && GET_CODE (XEXP (ind, 0)) == PLUS
8959 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8960 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8963 /* Match: (mem (reg)). */
8964 if (GET_CODE (ind) == REG)
8965 return arm_address_register_rtx_p (ind, 0);
8967 /* Allow post-increment with Neon registers. */
8968 if ((type != 1 && GET_CODE (ind) == POST_INC)
8969 || (type == 0 && GET_CODE (ind) == PRE_DEC))
8970 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8972 /* FIXME: vld1 allows register post-modify. */
8978 && GET_CODE (ind) == PLUS
8979 && GET_CODE (XEXP (ind, 0)) == REG
8980 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8981 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8982 && INTVAL (XEXP (ind, 1)) > -1024
8983 && INTVAL (XEXP (ind, 1)) < 1016
8984 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8990 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8993 neon_struct_mem_operand (rtx op)
8997 /* Reject eliminable registers. */
8998 if (! (reload_in_progress || reload_completed)
8999 && ( reg_mentioned_p (frame_pointer_rtx, op)
9000 || reg_mentioned_p (arg_pointer_rtx, op)
9001 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9002 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9003 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9004 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9007 /* Constants are converted into offsets from labels. */
9008 if (GET_CODE (op) != MEM)
9013 if (reload_completed
9014 && (GET_CODE (ind) == LABEL_REF
9015 || (GET_CODE (ind) == CONST
9016 && GET_CODE (XEXP (ind, 0)) == PLUS
9017 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9018 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9021 /* Match: (mem (reg)). */
9022 if (GET_CODE (ind) == REG)
9023 return arm_address_register_rtx_p (ind, 0);
9025 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9026 if (GET_CODE (ind) == POST_INC
9027 || GET_CODE (ind) == PRE_DEC)
9028 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9033 /* Return true if X is a register that will be eliminated later on. */
9035 arm_eliminable_register (rtx x)
9037 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9038 || REGNO (x) == ARG_POINTER_REGNUM
9039 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9040 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9043 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9044 coprocessor registers. Otherwise return NO_REGS. */
9047 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9051 if (!TARGET_NEON_FP16)
9052 return GENERAL_REGS;
9053 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9055 return GENERAL_REGS;
9058 /* The neon move patterns handle all legitimate vector and struct
9062 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9063 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9064 || VALID_NEON_STRUCT_MODE (mode)))
9067 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9070 return GENERAL_REGS;
9073 /* Values which must be returned in the most-significant end of the return
9077 arm_return_in_msb (const_tree valtype)
9079 return (TARGET_AAPCS_BASED
9081 && (AGGREGATE_TYPE_P (valtype)
9082 || TREE_CODE (valtype) == COMPLEX_TYPE));
9085 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9086 Use by the Cirrus Maverick code which has to workaround
9087 a hardware bug triggered by such instructions. */
9089 arm_memory_load_p (rtx insn)
9091 rtx body, lhs, rhs;;
9093 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9096 body = PATTERN (insn);
9098 if (GET_CODE (body) != SET)
9101 lhs = XEXP (body, 0);
9102 rhs = XEXP (body, 1);
9104 lhs = REG_OR_SUBREG_RTX (lhs);
9106 /* If the destination is not a general purpose
9107 register we do not have to worry. */
9108 if (GET_CODE (lhs) != REG
9109 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9112 /* As well as loads from memory we also have to react
9113 to loads of invalid constants which will be turned
9114 into loads from the minipool. */
9115 return (GET_CODE (rhs) == MEM
9116 || GET_CODE (rhs) == SYMBOL_REF
9117 || note_invalid_constants (insn, -1, false));
9120 /* Return TRUE if INSN is a Cirrus instruction. */
9122 arm_cirrus_insn_p (rtx insn)
9124 enum attr_cirrus attr;
9126 /* get_attr cannot accept USE or CLOBBER. */
9128 || GET_CODE (insn) != INSN
9129 || GET_CODE (PATTERN (insn)) == USE
9130 || GET_CODE (PATTERN (insn)) == CLOBBER)
9133 attr = get_attr_cirrus (insn);
9135 return attr != CIRRUS_NOT;
9138 /* Cirrus reorg for invalid instruction combinations. */
9140 cirrus_reorg (rtx first)
9142 enum attr_cirrus attr;
9143 rtx body = PATTERN (first);
9147 /* Any branch must be followed by 2 non Cirrus instructions. */
9148 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9151 t = next_nonnote_insn (first);
9153 if (arm_cirrus_insn_p (t))
9156 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9160 emit_insn_after (gen_nop (), first);
9165 /* (float (blah)) is in parallel with a clobber. */
9166 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9167 body = XVECEXP (body, 0, 0);
9169 if (GET_CODE (body) == SET)
9171 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9173 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9174 be followed by a non Cirrus insn. */
9175 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9177 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9178 emit_insn_after (gen_nop (), first);
9182 else if (arm_memory_load_p (first))
9184 unsigned int arm_regno;
9186 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9187 ldr/cfmv64hr combination where the Rd field is the same
9188 in both instructions must be split with a non Cirrus
9195 /* Get Arm register number for ldr insn. */
9196 if (GET_CODE (lhs) == REG)
9197 arm_regno = REGNO (lhs);
9200 gcc_assert (GET_CODE (rhs) == REG);
9201 arm_regno = REGNO (rhs);
9205 first = next_nonnote_insn (first);
9207 if (! arm_cirrus_insn_p (first))
9210 body = PATTERN (first);
9212 /* (float (blah)) is in parallel with a clobber. */
9213 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9214 body = XVECEXP (body, 0, 0);
9216 if (GET_CODE (body) == FLOAT)
9217 body = XEXP (body, 0);
9219 if (get_attr_cirrus (first) == CIRRUS_MOVE
9220 && GET_CODE (XEXP (body, 1)) == REG
9221 && arm_regno == REGNO (XEXP (body, 1)))
9222 emit_insn_after (gen_nop (), first);
9228 /* get_attr cannot accept USE or CLOBBER. */
9230 || GET_CODE (first) != INSN
9231 || GET_CODE (PATTERN (first)) == USE
9232 || GET_CODE (PATTERN (first)) == CLOBBER)
9235 attr = get_attr_cirrus (first);
9237 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9238 must be followed by a non-coprocessor instruction. */
9239 if (attr == CIRRUS_COMPARE)
9243 t = next_nonnote_insn (first);
9245 if (arm_cirrus_insn_p (t))
9248 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9252 emit_insn_after (gen_nop (), first);
9258 /* Return TRUE if X references a SYMBOL_REF. */
9260 symbol_mentioned_p (rtx x)
9265 if (GET_CODE (x) == SYMBOL_REF)
9268 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9269 are constant offsets, not symbols. */
9270 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9273 fmt = GET_RTX_FORMAT (GET_CODE (x));
9275 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9281 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9282 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9285 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9292 /* Return TRUE if X references a LABEL_REF. */
9294 label_mentioned_p (rtx x)
9299 if (GET_CODE (x) == LABEL_REF)
9302 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9303 instruction, but they are constant offsets, not symbols. */
9304 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9307 fmt = GET_RTX_FORMAT (GET_CODE (x));
9308 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9314 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9315 if (label_mentioned_p (XVECEXP (x, i, j)))
9318 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9326 tls_mentioned_p (rtx x)
9328 switch (GET_CODE (x))
9331 return tls_mentioned_p (XEXP (x, 0));
9334 if (XINT (x, 1) == UNSPEC_TLS)
9342 /* Must not copy any rtx that uses a pc-relative address. */
9345 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9347 if (GET_CODE (*x) == UNSPEC
9348 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9354 arm_cannot_copy_insn_p (rtx insn)
9356 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9362 enum rtx_code code = GET_CODE (x);
9379 /* Return 1 if memory locations are adjacent. */
9381 adjacent_mem_locations (rtx a, rtx b)
9383 /* We don't guarantee to preserve the order of these memory refs. */
9384 if (volatile_refs_p (a) || volatile_refs_p (b))
9387 if ((GET_CODE (XEXP (a, 0)) == REG
9388 || (GET_CODE (XEXP (a, 0)) == PLUS
9389 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9390 && (GET_CODE (XEXP (b, 0)) == REG
9391 || (GET_CODE (XEXP (b, 0)) == PLUS
9392 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9394 HOST_WIDE_INT val0 = 0, val1 = 0;
9398 if (GET_CODE (XEXP (a, 0)) == PLUS)
9400 reg0 = XEXP (XEXP (a, 0), 0);
9401 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9406 if (GET_CODE (XEXP (b, 0)) == PLUS)
9408 reg1 = XEXP (XEXP (b, 0), 0);
9409 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9414 /* Don't accept any offset that will require multiple
9415 instructions to handle, since this would cause the
9416 arith_adjacentmem pattern to output an overlong sequence. */
9417 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9420 /* Don't allow an eliminable register: register elimination can make
9421 the offset too large. */
9422 if (arm_eliminable_register (reg0))
9425 val_diff = val1 - val0;
9429 /* If the target has load delay slots, then there's no benefit
9430 to using an ldm instruction unless the offset is zero and
9431 we are optimizing for size. */
9432 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9433 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9434 && (val_diff == 4 || val_diff == -4));
9437 return ((REGNO (reg0) == REGNO (reg1))
9438 && (val_diff == 4 || val_diff == -4));
9444 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9445 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9446 instruction. ADD_OFFSET is nonzero if the base address register needs
9447 to be modified with an add instruction before we can use it. */
9450 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9451 int nops, HOST_WIDE_INT add_offset)
9453 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9454 if the offset isn't small enough. The reason 2 ldrs are faster
9455 is because these ARMs are able to do more than one cache access
9456 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9457 whilst the ARM8 has a double bandwidth cache. This means that
9458 these cores can do both an instruction fetch and a data fetch in
9459 a single cycle, so the trick of calculating the address into a
9460 scratch register (one of the result regs) and then doing a load
9461 multiple actually becomes slower (and no smaller in code size).
9462 That is the transformation
9464 ldr rd1, [rbase + offset]
9465 ldr rd2, [rbase + offset + 4]
9469 add rd1, rbase, offset
9470 ldmia rd1, {rd1, rd2}
9472 produces worse code -- '3 cycles + any stalls on rd2' instead of
9473 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9474 access per cycle, the first sequence could never complete in less
9475 than 6 cycles, whereas the ldm sequence would only take 5 and
9476 would make better use of sequential accesses if not hitting the
9479 We cheat here and test 'arm_ld_sched' which we currently know to
9480 only be true for the ARM8, ARM9 and StrongARM. If this ever
9481 changes, then the test below needs to be reworked. */
9482 if (nops == 2 && arm_ld_sched && add_offset != 0)
9485 /* XScale has load-store double instructions, but they have stricter
9486 alignment requirements than load-store multiple, so we cannot
9489 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9490 the pipeline until completion.
9498 An ldr instruction takes 1-3 cycles, but does not block the
9507 Best case ldr will always win. However, the more ldr instructions
9508 we issue, the less likely we are to be able to schedule them well.
9509 Using ldr instructions also increases code size.
9511 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9512 for counts of 3 or 4 regs. */
9513 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9518 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9519 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9520 an array ORDER which describes the sequence to use when accessing the
9521 offsets that produces an ascending order. In this sequence, each
9522 offset must be larger by exactly 4 than the previous one. ORDER[0]
9523 must have been filled in with the lowest offset by the caller.
9524 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9525 we use to verify that ORDER produces an ascending order of registers.
9526 Return true if it was possible to construct such an order, false if
9530 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9534 for (i = 1; i < nops; i++)
9538 order[i] = order[i - 1];
9539 for (j = 0; j < nops; j++)
9540 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9542 /* We must find exactly one offset that is higher than the
9543 previous one by 4. */
9544 if (order[i] != order[i - 1])
9548 if (order[i] == order[i - 1])
9550 /* The register numbers must be ascending. */
9551 if (unsorted_regs != NULL
9552 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9558 /* Used to determine in a peephole whether a sequence of load
9559 instructions can be changed into a load-multiple instruction.
9560 NOPS is the number of separate load instructions we are examining. The
9561 first NOPS entries in OPERANDS are the destination registers, the
9562 next NOPS entries are memory operands. If this function is
9563 successful, *BASE is set to the common base register of the memory
9564 accesses; *LOAD_OFFSET is set to the first memory location's offset
9565 from that base register.
9566 REGS is an array filled in with the destination register numbers.
9567 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9568 insn numbers to an ascending order of stores. If CHECK_REGS is true,
9569 the sequence of registers in REGS matches the loads from ascending memory
9570 locations, and the function verifies that the register numbers are
9571 themselves ascending. If CHECK_REGS is false, the register numbers
9572 are stored in the order they are found in the operands. */
9574 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9575 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9577 int unsorted_regs[MAX_LDM_STM_OPS];
9578 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9579 int order[MAX_LDM_STM_OPS];
9580 rtx base_reg_rtx = NULL;
9584 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9585 easily extended if required. */
9586 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9588 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9590 /* Loop over the operands and check that the memory references are
9591 suitable (i.e. immediate offsets from the same base register). At
9592 the same time, extract the target register, and the memory
9594 for (i = 0; i < nops; i++)
9599 /* Convert a subreg of a mem into the mem itself. */
9600 if (GET_CODE (operands[nops + i]) == SUBREG)
9601 operands[nops + i] = alter_subreg (operands + (nops + i));
9603 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9605 /* Don't reorder volatile memory references; it doesn't seem worth
9606 looking for the case where the order is ok anyway. */
9607 if (MEM_VOLATILE_P (operands[nops + i]))
9610 offset = const0_rtx;
9612 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9613 || (GET_CODE (reg) == SUBREG
9614 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9615 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9616 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9618 || (GET_CODE (reg) == SUBREG
9619 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9620 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9625 base_reg = REGNO (reg);
9627 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9630 else if (base_reg != (int) REGNO (reg))
9631 /* Not addressed from the same base register. */
9634 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9635 ? REGNO (operands[i])
9636 : REGNO (SUBREG_REG (operands[i])));
9638 /* If it isn't an integer register, or if it overwrites the
9639 base register but isn't the last insn in the list, then
9640 we can't do this. */
9641 if (unsorted_regs[i] < 0
9642 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9643 || unsorted_regs[i] > 14
9644 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9647 unsorted_offsets[i] = INTVAL (offset);
9648 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9652 /* Not a suitable memory address. */
9656 /* All the useful information has now been extracted from the
9657 operands into unsorted_regs and unsorted_offsets; additionally,
9658 order[0] has been set to the lowest offset in the list. Sort
9659 the offsets into order, verifying that they are adjacent, and
9660 check that the register numbers are ascending. */
9661 if (!compute_offset_order (nops, unsorted_offsets, order,
9662 check_regs ? unsorted_regs : NULL))
9666 memcpy (saved_order, order, sizeof order);
9672 for (i = 0; i < nops; i++)
9673 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9675 *load_offset = unsorted_offsets[order[0]];
9679 && !peep2_reg_dead_p (nops, base_reg_rtx))
9682 if (unsorted_offsets[order[0]] == 0)
9683 ldm_case = 1; /* ldmia */
9684 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9685 ldm_case = 2; /* ldmib */
9686 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9687 ldm_case = 3; /* ldmda */
9688 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9689 ldm_case = 4; /* ldmdb */
9690 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9691 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9696 if (!multiple_operation_profitable_p (false, nops,
9698 ? unsorted_offsets[order[0]] : 0))
9704 /* Used to determine in a peephole whether a sequence of store instructions can
9705 be changed into a store-multiple instruction.
9706 NOPS is the number of separate store instructions we are examining.
9707 NOPS_TOTAL is the total number of instructions recognized by the peephole
9709 The first NOPS entries in OPERANDS are the source registers, the next
9710 NOPS entries are memory operands. If this function is successful, *BASE is
9711 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9712 to the first memory location's offset from that base register. REGS is an
9713 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9714 likewise filled with the corresponding rtx's.
9715 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9716 numbers to an ascending order of stores.
9717 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9718 from ascending memory locations, and the function verifies that the register
9719 numbers are themselves ascending. If CHECK_REGS is false, the register
9720 numbers are stored in the order they are found in the operands. */
9722 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9723 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9724 HOST_WIDE_INT *load_offset, bool check_regs)
9726 int unsorted_regs[MAX_LDM_STM_OPS];
9727 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9728 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9729 int order[MAX_LDM_STM_OPS];
9731 rtx base_reg_rtx = NULL;
9734 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9735 easily extended if required. */
9736 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9738 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9740 /* Loop over the operands and check that the memory references are
9741 suitable (i.e. immediate offsets from the same base register). At
9742 the same time, extract the target register, and the memory
9744 for (i = 0; i < nops; i++)
9749 /* Convert a subreg of a mem into the mem itself. */
9750 if (GET_CODE (operands[nops + i]) == SUBREG)
9751 operands[nops + i] = alter_subreg (operands + (nops + i));
9753 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9755 /* Don't reorder volatile memory references; it doesn't seem worth
9756 looking for the case where the order is ok anyway. */
9757 if (MEM_VOLATILE_P (operands[nops + i]))
9760 offset = const0_rtx;
9762 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9763 || (GET_CODE (reg) == SUBREG
9764 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9765 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9766 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9768 || (GET_CODE (reg) == SUBREG
9769 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9770 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9773 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9774 ? operands[i] : SUBREG_REG (operands[i]));
9775 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9779 base_reg = REGNO (reg);
9781 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9784 else if (base_reg != (int) REGNO (reg))
9785 /* Not addressed from the same base register. */
9788 /* If it isn't an integer register, then we can't do this. */
9789 if (unsorted_regs[i] < 0
9790 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9791 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9792 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9793 || unsorted_regs[i] > 14)
9796 unsorted_offsets[i] = INTVAL (offset);
9797 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9801 /* Not a suitable memory address. */
9805 /* All the useful information has now been extracted from the
9806 operands into unsorted_regs and unsorted_offsets; additionally,
9807 order[0] has been set to the lowest offset in the list. Sort
9808 the offsets into order, verifying that they are adjacent, and
9809 check that the register numbers are ascending. */
9810 if (!compute_offset_order (nops, unsorted_offsets, order,
9811 check_regs ? unsorted_regs : NULL))
9815 memcpy (saved_order, order, sizeof order);
9821 for (i = 0; i < nops; i++)
9823 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9825 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9828 *load_offset = unsorted_offsets[order[0]];
9832 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9835 if (unsorted_offsets[order[0]] == 0)
9836 stm_case = 1; /* stmia */
9837 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9838 stm_case = 2; /* stmib */
9839 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9840 stm_case = 3; /* stmda */
9841 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9842 stm_case = 4; /* stmdb */
9846 if (!multiple_operation_profitable_p (false, nops, 0))
9852 /* Routines for use in generating RTL. */
9854 /* Generate a load-multiple instruction. COUNT is the number of loads in
9855 the instruction; REGS and MEMS are arrays containing the operands.
9856 BASEREG is the base register to be used in addressing the memory operands.
9857 WBACK_OFFSET is nonzero if the instruction should update the base
9861 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9862 HOST_WIDE_INT wback_offset)
9867 if (!multiple_operation_profitable_p (false, count, 0))
9873 for (i = 0; i < count; i++)
9874 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9876 if (wback_offset != 0)
9877 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9885 result = gen_rtx_PARALLEL (VOIDmode,
9886 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9887 if (wback_offset != 0)
9889 XVECEXP (result, 0, 0)
9890 = gen_rtx_SET (VOIDmode, basereg,
9891 plus_constant (basereg, wback_offset));
9896 for (j = 0; i < count; i++, j++)
9897 XVECEXP (result, 0, i)
9898 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9903 /* Generate a store-multiple instruction. COUNT is the number of stores in
9904 the instruction; REGS and MEMS are arrays containing the operands.
9905 BASEREG is the base register to be used in addressing the memory operands.
9906 WBACK_OFFSET is nonzero if the instruction should update the base
9910 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9911 HOST_WIDE_INT wback_offset)
9916 if (GET_CODE (basereg) == PLUS)
9917 basereg = XEXP (basereg, 0);
9919 if (!multiple_operation_profitable_p (false, count, 0))
9925 for (i = 0; i < count; i++)
9926 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
9928 if (wback_offset != 0)
9929 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9937 result = gen_rtx_PARALLEL (VOIDmode,
9938 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9939 if (wback_offset != 0)
9941 XVECEXP (result, 0, 0)
9942 = gen_rtx_SET (VOIDmode, basereg,
9943 plus_constant (basereg, wback_offset));
9948 for (j = 0; i < count; i++, j++)
9949 XVECEXP (result, 0, i)
9950 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
9955 /* Generate either a load-multiple or a store-multiple instruction. This
9956 function can be used in situations where we can start with a single MEM
9957 rtx and adjust its address upwards.
9958 COUNT is the number of operations in the instruction, not counting a
9959 possible update of the base register. REGS is an array containing the
9961 BASEREG is the base register to be used in addressing the memory operands,
9962 which are constructed from BASEMEM.
9963 WRITE_BACK specifies whether the generated instruction should include an
9964 update of the base register.
9965 OFFSETP is used to pass an offset to and from this function; this offset
9966 is not used when constructing the address (instead BASEMEM should have an
9967 appropriate offset in its address), it is used only for setting
9968 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9971 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
9972 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9974 rtx mems[MAX_LDM_STM_OPS];
9975 HOST_WIDE_INT offset = *offsetp;
9978 gcc_assert (count <= MAX_LDM_STM_OPS);
9980 if (GET_CODE (basereg) == PLUS)
9981 basereg = XEXP (basereg, 0);
9983 for (i = 0; i < count; i++)
9985 rtx addr = plus_constant (basereg, i * 4);
9986 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9994 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
9995 write_back ? 4 * count : 0);
9997 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
9998 write_back ? 4 * count : 0);
10002 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10003 rtx basemem, HOST_WIDE_INT *offsetp)
10005 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10010 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10011 rtx basemem, HOST_WIDE_INT *offsetp)
10013 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10017 /* Called from a peephole2 expander to turn a sequence of loads into an
10018 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10019 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10020 is true if we can reorder the registers because they are used commutatively
10022 Returns true iff we could generate a new instruction. */
10025 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10027 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10028 rtx mems[MAX_LDM_STM_OPS];
10029 int i, j, base_reg;
10031 HOST_WIDE_INT offset;
10032 int write_back = FALSE;
10036 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10037 &base_reg, &offset, !sort_regs);
10043 for (i = 0; i < nops - 1; i++)
10044 for (j = i + 1; j < nops; j++)
10045 if (regs[i] > regs[j])
10051 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10055 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10056 gcc_assert (ldm_case == 1 || ldm_case == 5);
10062 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10063 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10065 if (!TARGET_THUMB1)
10067 base_reg = regs[0];
10068 base_reg_rtx = newbase;
10072 for (i = 0; i < nops; i++)
10074 addr = plus_constant (base_reg_rtx, offset + i * 4);
10075 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10078 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10079 write_back ? offset + i * 4 : 0));
10083 /* Called from a peephole2 expander to turn a sequence of stores into an
10084 STM instruction. OPERANDS are the operands found by the peephole matcher;
10085 NOPS indicates how many separate stores we are trying to combine.
10086 Returns true iff we could generate a new instruction. */
10089 gen_stm_seq (rtx *operands, int nops)
10092 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10093 rtx mems[MAX_LDM_STM_OPS];
10096 HOST_WIDE_INT offset;
10097 int write_back = FALSE;
10100 bool base_reg_dies;
10102 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10103 mem_order, &base_reg, &offset, true);
10108 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10110 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10113 gcc_assert (base_reg_dies);
10119 gcc_assert (base_reg_dies);
10120 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10124 addr = plus_constant (base_reg_rtx, offset);
10126 for (i = 0; i < nops; i++)
10128 addr = plus_constant (base_reg_rtx, offset + i * 4);
10129 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10132 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10133 write_back ? offset + i * 4 : 0));
10137 /* Called from a peephole2 expander to turn a sequence of stores that are
10138 preceded by constant loads into an STM instruction. OPERANDS are the
10139 operands found by the peephole matcher; NOPS indicates how many
10140 separate stores we are trying to combine; there are 2 * NOPS
10141 instructions in the peephole.
10142 Returns true iff we could generate a new instruction. */
10145 gen_const_stm_seq (rtx *operands, int nops)
10147 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10148 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10149 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10150 rtx mems[MAX_LDM_STM_OPS];
10153 HOST_WIDE_INT offset;
10154 int write_back = FALSE;
10157 bool base_reg_dies;
10159 HARD_REG_SET allocated;
10161 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10162 mem_order, &base_reg, &offset, false);
10167 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10169 /* If the same register is used more than once, try to find a free
10171 CLEAR_HARD_REG_SET (allocated);
10172 for (i = 0; i < nops; i++)
10174 for (j = i + 1; j < nops; j++)
10175 if (regs[i] == regs[j])
10177 rtx t = peep2_find_free_register (0, nops * 2,
10178 TARGET_THUMB1 ? "l" : "r",
10179 SImode, &allocated);
10183 regs[i] = REGNO (t);
10187 /* Compute an ordering that maps the register numbers to an ascending
10190 for (i = 0; i < nops; i++)
10191 if (regs[i] < regs[reg_order[0]])
10194 for (i = 1; i < nops; i++)
10196 int this_order = reg_order[i - 1];
10197 for (j = 0; j < nops; j++)
10198 if (regs[j] > regs[reg_order[i - 1]]
10199 && (this_order == reg_order[i - 1]
10200 || regs[j] < regs[this_order]))
10202 reg_order[i] = this_order;
10205 /* Ensure that registers that must be live after the instruction end
10206 up with the correct value. */
10207 for (i = 0; i < nops; i++)
10209 int this_order = reg_order[i];
10210 if ((this_order != mem_order[i]
10211 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10212 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10216 /* Load the constants. */
10217 for (i = 0; i < nops; i++)
10219 rtx op = operands[2 * nops + mem_order[i]];
10220 sorted_regs[i] = regs[reg_order[i]];
10221 emit_move_insn (reg_rtxs[reg_order[i]], op);
10224 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10226 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10229 gcc_assert (base_reg_dies);
10235 gcc_assert (base_reg_dies);
10236 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10240 addr = plus_constant (base_reg_rtx, offset);
10242 for (i = 0; i < nops; i++)
10244 addr = plus_constant (base_reg_rtx, offset + i * 4);
10245 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10248 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10249 write_back ? offset + i * 4 : 0));
10254 arm_gen_movmemqi (rtx *operands)
10256 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10257 HOST_WIDE_INT srcoffset, dstoffset;
10259 rtx src, dst, srcbase, dstbase;
10260 rtx part_bytes_reg = NULL;
10263 if (GET_CODE (operands[2]) != CONST_INT
10264 || GET_CODE (operands[3]) != CONST_INT
10265 || INTVAL (operands[2]) > 64
10266 || INTVAL (operands[3]) & 3)
10269 dstbase = operands[0];
10270 srcbase = operands[1];
10272 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10273 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10275 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10276 out_words_to_go = INTVAL (operands[2]) / 4;
10277 last_bytes = INTVAL (operands[2]) & 3;
10278 dstoffset = srcoffset = 0;
10280 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10281 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10283 for (i = 0; in_words_to_go >= 2; i+=4)
10285 if (in_words_to_go > 4)
10286 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10287 TRUE, srcbase, &srcoffset));
10289 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10290 src, FALSE, srcbase,
10293 if (out_words_to_go)
10295 if (out_words_to_go > 4)
10296 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10297 TRUE, dstbase, &dstoffset));
10298 else if (out_words_to_go != 1)
10299 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10300 out_words_to_go, dst,
10303 dstbase, &dstoffset));
10306 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10307 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10308 if (last_bytes != 0)
10310 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10316 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10317 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10320 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10321 if (out_words_to_go)
10325 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10326 sreg = copy_to_reg (mem);
10328 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10329 emit_move_insn (mem, sreg);
10332 gcc_assert (!in_words_to_go); /* Sanity check */
10335 if (in_words_to_go)
10337 gcc_assert (in_words_to_go > 0);
10339 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10340 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10343 gcc_assert (!last_bytes || part_bytes_reg);
10345 if (BYTES_BIG_ENDIAN && last_bytes)
10347 rtx tmp = gen_reg_rtx (SImode);
10349 /* The bytes we want are in the top end of the word. */
10350 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10351 GEN_INT (8 * (4 - last_bytes))));
10352 part_bytes_reg = tmp;
10356 mem = adjust_automodify_address (dstbase, QImode,
10357 plus_constant (dst, last_bytes - 1),
10358 dstoffset + last_bytes - 1);
10359 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10363 tmp = gen_reg_rtx (SImode);
10364 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10365 part_bytes_reg = tmp;
10372 if (last_bytes > 1)
10374 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10375 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10379 rtx tmp = gen_reg_rtx (SImode);
10380 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10381 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10382 part_bytes_reg = tmp;
10389 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10390 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10397 /* Select a dominance comparison mode if possible for a test of the general
10398 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10399 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10400 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10401 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10402 In all cases OP will be either EQ or NE, but we don't need to know which
10403 here. If we are unable to support a dominance comparison we return
10404 CC mode. This will then fail to match for the RTL expressions that
10405 generate this call. */
10407 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10409 enum rtx_code cond1, cond2;
10412 /* Currently we will probably get the wrong result if the individual
10413 comparisons are not simple. This also ensures that it is safe to
10414 reverse a comparison if necessary. */
10415 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10417 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10421 /* The if_then_else variant of this tests the second condition if the
10422 first passes, but is true if the first fails. Reverse the first
10423 condition to get a true "inclusive-or" expression. */
10424 if (cond_or == DOM_CC_NX_OR_Y)
10425 cond1 = reverse_condition (cond1);
10427 /* If the comparisons are not equal, and one doesn't dominate the other,
10428 then we can't do this. */
10430 && !comparison_dominates_p (cond1, cond2)
10431 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10436 enum rtx_code temp = cond1;
10444 if (cond_or == DOM_CC_X_AND_Y)
10449 case EQ: return CC_DEQmode;
10450 case LE: return CC_DLEmode;
10451 case LEU: return CC_DLEUmode;
10452 case GE: return CC_DGEmode;
10453 case GEU: return CC_DGEUmode;
10454 default: gcc_unreachable ();
10458 if (cond_or == DOM_CC_X_AND_Y)
10470 gcc_unreachable ();
10474 if (cond_or == DOM_CC_X_AND_Y)
10486 gcc_unreachable ();
10490 if (cond_or == DOM_CC_X_AND_Y)
10491 return CC_DLTUmode;
10496 return CC_DLTUmode;
10498 return CC_DLEUmode;
10502 gcc_unreachable ();
10506 if (cond_or == DOM_CC_X_AND_Y)
10507 return CC_DGTUmode;
10512 return CC_DGTUmode;
10514 return CC_DGEUmode;
10518 gcc_unreachable ();
10521 /* The remaining cases only occur when both comparisons are the
10524 gcc_assert (cond1 == cond2);
10528 gcc_assert (cond1 == cond2);
10532 gcc_assert (cond1 == cond2);
10536 gcc_assert (cond1 == cond2);
10537 return CC_DLEUmode;
10540 gcc_assert (cond1 == cond2);
10541 return CC_DGEUmode;
10544 gcc_unreachable ();
10549 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10551 /* All floating point compares return CCFP if it is an equality
10552 comparison, and CCFPE otherwise. */
10553 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10573 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10578 gcc_unreachable ();
10582 /* A compare with a shifted operand. Because of canonicalization, the
10583 comparison will have to be swapped when we emit the assembler. */
10584 if (GET_MODE (y) == SImode
10585 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10586 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10587 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10588 || GET_CODE (x) == ROTATERT))
10591 /* This operation is performed swapped, but since we only rely on the Z
10592 flag we don't need an additional mode. */
10593 if (GET_MODE (y) == SImode
10594 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10595 && GET_CODE (x) == NEG
10596 && (op == EQ || op == NE))
10599 /* This is a special case that is used by combine to allow a
10600 comparison of a shifted byte load to be split into a zero-extend
10601 followed by a comparison of the shifted integer (only valid for
10602 equalities and unsigned inequalities). */
10603 if (GET_MODE (x) == SImode
10604 && GET_CODE (x) == ASHIFT
10605 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10606 && GET_CODE (XEXP (x, 0)) == SUBREG
10607 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10608 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10609 && (op == EQ || op == NE
10610 || op == GEU || op == GTU || op == LTU || op == LEU)
10611 && GET_CODE (y) == CONST_INT)
10614 /* A construct for a conditional compare, if the false arm contains
10615 0, then both conditions must be true, otherwise either condition
10616 must be true. Not all conditions are possible, so CCmode is
10617 returned if it can't be done. */
10618 if (GET_CODE (x) == IF_THEN_ELSE
10619 && (XEXP (x, 2) == const0_rtx
10620 || XEXP (x, 2) == const1_rtx)
10621 && COMPARISON_P (XEXP (x, 0))
10622 && COMPARISON_P (XEXP (x, 1)))
10623 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10624 INTVAL (XEXP (x, 2)));
10626 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10627 if (GET_CODE (x) == AND
10628 && (op == EQ || op == NE)
10629 && COMPARISON_P (XEXP (x, 0))
10630 && COMPARISON_P (XEXP (x, 1)))
10631 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10634 if (GET_CODE (x) == IOR
10635 && (op == EQ || op == NE)
10636 && COMPARISON_P (XEXP (x, 0))
10637 && COMPARISON_P (XEXP (x, 1)))
10638 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10641 /* An operation (on Thumb) where we want to test for a single bit.
10642 This is done by shifting that bit up into the top bit of a
10643 scratch register; we can then branch on the sign bit. */
10645 && GET_MODE (x) == SImode
10646 && (op == EQ || op == NE)
10647 && GET_CODE (x) == ZERO_EXTRACT
10648 && XEXP (x, 1) == const1_rtx)
10651 /* An operation that sets the condition codes as a side-effect, the
10652 V flag is not set correctly, so we can only use comparisons where
10653 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10655 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10656 if (GET_MODE (x) == SImode
10658 && (op == EQ || op == NE || op == LT || op == GE)
10659 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10660 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10661 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10662 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10663 || GET_CODE (x) == LSHIFTRT
10664 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10665 || GET_CODE (x) == ROTATERT
10666 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10667 return CC_NOOVmode;
10669 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10672 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10673 && GET_CODE (x) == PLUS
10674 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10677 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10679 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10681 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10688 /* A DImode comparison against zero can be implemented by
10689 or'ing the two halves together. */
10690 if (y == const0_rtx)
10693 /* We can do an equality test in three Thumb instructions. */
10703 /* DImode unsigned comparisons can be implemented by cmp +
10704 cmpeq without a scratch register. Not worth doing in
10715 /* DImode signed and unsigned comparisons can be implemented
10716 by cmp + sbcs with a scratch register, but that does not
10717 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10718 gcc_assert (op != EQ && op != NE);
10722 gcc_unreachable ();
10729 /* X and Y are two things to compare using CODE. Emit the compare insn and
10730 return the rtx for register 0 in the proper mode. FP means this is a
10731 floating point compare: I don't think that it is needed on the arm. */
10733 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10735 enum machine_mode mode;
10737 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10739 /* We might have X as a constant, Y as a register because of the predicates
10740 used for cmpdi. If so, force X to a register here. */
10741 if (dimode_comparison && !REG_P (x))
10742 x = force_reg (DImode, x);
10744 mode = SELECT_CC_MODE (code, x, y);
10745 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10747 if (dimode_comparison
10748 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10749 && mode != CC_CZmode)
10753 /* To compare two non-zero values for equality, XOR them and
10754 then compare against zero. Not used for ARM mode; there
10755 CC_CZmode is cheaper. */
10756 if (mode == CC_Zmode && y != const0_rtx)
10758 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10761 /* A scratch register is required. */
10762 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10763 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10764 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10767 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10772 /* Generate a sequence of insns that will generate the correct return
10773 address mask depending on the physical architecture that the program
10776 arm_gen_return_addr_mask (void)
10778 rtx reg = gen_reg_rtx (Pmode);
10780 emit_insn (gen_return_addr_mask (reg));
10785 arm_reload_in_hi (rtx *operands)
10787 rtx ref = operands[1];
10789 HOST_WIDE_INT offset = 0;
10791 if (GET_CODE (ref) == SUBREG)
10793 offset = SUBREG_BYTE (ref);
10794 ref = SUBREG_REG (ref);
10797 if (GET_CODE (ref) == REG)
10799 /* We have a pseudo which has been spilt onto the stack; there
10800 are two cases here: the first where there is a simple
10801 stack-slot replacement and a second where the stack-slot is
10802 out of range, or is used as a subreg. */
10803 if (reg_equiv_mem (REGNO (ref)))
10805 ref = reg_equiv_mem (REGNO (ref));
10806 base = find_replacement (&XEXP (ref, 0));
10809 /* The slot is out of range, or was dressed up in a SUBREG. */
10810 base = reg_equiv_address (REGNO (ref));
10813 base = find_replacement (&XEXP (ref, 0));
10815 /* Handle the case where the address is too complex to be offset by 1. */
10816 if (GET_CODE (base) == MINUS
10817 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10819 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10821 emit_set_insn (base_plus, base);
10824 else if (GET_CODE (base) == PLUS)
10826 /* The addend must be CONST_INT, or we would have dealt with it above. */
10827 HOST_WIDE_INT hi, lo;
10829 offset += INTVAL (XEXP (base, 1));
10830 base = XEXP (base, 0);
10832 /* Rework the address into a legal sequence of insns. */
10833 /* Valid range for lo is -4095 -> 4095 */
10836 : -((-offset) & 0xfff));
10838 /* Corner case, if lo is the max offset then we would be out of range
10839 once we have added the additional 1 below, so bump the msb into the
10840 pre-loading insn(s). */
10844 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10845 ^ (HOST_WIDE_INT) 0x80000000)
10846 - (HOST_WIDE_INT) 0x80000000);
10848 gcc_assert (hi + lo == offset);
10852 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10854 /* Get the base address; addsi3 knows how to handle constants
10855 that require more than one insn. */
10856 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10862 /* Operands[2] may overlap operands[0] (though it won't overlap
10863 operands[1]), that's why we asked for a DImode reg -- so we can
10864 use the bit that does not overlap. */
10865 if (REGNO (operands[2]) == REGNO (operands[0]))
10866 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10868 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10870 emit_insn (gen_zero_extendqisi2 (scratch,
10871 gen_rtx_MEM (QImode,
10872 plus_constant (base,
10874 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10875 gen_rtx_MEM (QImode,
10876 plus_constant (base,
10878 if (!BYTES_BIG_ENDIAN)
10879 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10880 gen_rtx_IOR (SImode,
10883 gen_rtx_SUBREG (SImode, operands[0], 0),
10887 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10888 gen_rtx_IOR (SImode,
10889 gen_rtx_ASHIFT (SImode, scratch,
10891 gen_rtx_SUBREG (SImode, operands[0], 0)));
10894 /* Handle storing a half-word to memory during reload by synthesizing as two
10895 byte stores. Take care not to clobber the input values until after we
10896 have moved them somewhere safe. This code assumes that if the DImode
10897 scratch in operands[2] overlaps either the input value or output address
10898 in some way, then that value must die in this insn (we absolutely need
10899 two scratch registers for some corner cases). */
10901 arm_reload_out_hi (rtx *operands)
10903 rtx ref = operands[0];
10904 rtx outval = operands[1];
10906 HOST_WIDE_INT offset = 0;
10908 if (GET_CODE (ref) == SUBREG)
10910 offset = SUBREG_BYTE (ref);
10911 ref = SUBREG_REG (ref);
10914 if (GET_CODE (ref) == REG)
10916 /* We have a pseudo which has been spilt onto the stack; there
10917 are two cases here: the first where there is a simple
10918 stack-slot replacement and a second where the stack-slot is
10919 out of range, or is used as a subreg. */
10920 if (reg_equiv_mem (REGNO (ref)))
10922 ref = reg_equiv_mem (REGNO (ref));
10923 base = find_replacement (&XEXP (ref, 0));
10926 /* The slot is out of range, or was dressed up in a SUBREG. */
10927 base = reg_equiv_address (REGNO (ref));
10930 base = find_replacement (&XEXP (ref, 0));
10932 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10934 /* Handle the case where the address is too complex to be offset by 1. */
10935 if (GET_CODE (base) == MINUS
10936 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10938 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10940 /* Be careful not to destroy OUTVAL. */
10941 if (reg_overlap_mentioned_p (base_plus, outval))
10943 /* Updating base_plus might destroy outval, see if we can
10944 swap the scratch and base_plus. */
10945 if (!reg_overlap_mentioned_p (scratch, outval))
10948 scratch = base_plus;
10953 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10955 /* Be conservative and copy OUTVAL into the scratch now,
10956 this should only be necessary if outval is a subreg
10957 of something larger than a word. */
10958 /* XXX Might this clobber base? I can't see how it can,
10959 since scratch is known to overlap with OUTVAL, and
10960 must be wider than a word. */
10961 emit_insn (gen_movhi (scratch_hi, outval));
10962 outval = scratch_hi;
10966 emit_set_insn (base_plus, base);
10969 else if (GET_CODE (base) == PLUS)
10971 /* The addend must be CONST_INT, or we would have dealt with it above. */
10972 HOST_WIDE_INT hi, lo;
10974 offset += INTVAL (XEXP (base, 1));
10975 base = XEXP (base, 0);
10977 /* Rework the address into a legal sequence of insns. */
10978 /* Valid range for lo is -4095 -> 4095 */
10981 : -((-offset) & 0xfff));
10983 /* Corner case, if lo is the max offset then we would be out of range
10984 once we have added the additional 1 below, so bump the msb into the
10985 pre-loading insn(s). */
10989 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10990 ^ (HOST_WIDE_INT) 0x80000000)
10991 - (HOST_WIDE_INT) 0x80000000);
10993 gcc_assert (hi + lo == offset);
10997 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10999 /* Be careful not to destroy OUTVAL. */
11000 if (reg_overlap_mentioned_p (base_plus, outval))
11002 /* Updating base_plus might destroy outval, see if we
11003 can swap the scratch and base_plus. */
11004 if (!reg_overlap_mentioned_p (scratch, outval))
11007 scratch = base_plus;
11012 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11014 /* Be conservative and copy outval into scratch now,
11015 this should only be necessary if outval is a
11016 subreg of something larger than a word. */
11017 /* XXX Might this clobber base? I can't see how it
11018 can, since scratch is known to overlap with
11020 emit_insn (gen_movhi (scratch_hi, outval));
11021 outval = scratch_hi;
11025 /* Get the base address; addsi3 knows how to handle constants
11026 that require more than one insn. */
11027 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11033 if (BYTES_BIG_ENDIAN)
11035 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11036 plus_constant (base, offset + 1)),
11037 gen_lowpart (QImode, outval)));
11038 emit_insn (gen_lshrsi3 (scratch,
11039 gen_rtx_SUBREG (SImode, outval, 0),
11041 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11042 gen_lowpart (QImode, scratch)));
11046 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11047 gen_lowpart (QImode, outval)));
11048 emit_insn (gen_lshrsi3 (scratch,
11049 gen_rtx_SUBREG (SImode, outval, 0),
11051 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11052 plus_constant (base, offset + 1)),
11053 gen_lowpart (QImode, scratch)));
11057 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11058 (padded to the size of a word) should be passed in a register. */
11061 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11063 if (TARGET_AAPCS_BASED)
11064 return must_pass_in_stack_var_size (mode, type);
11066 return must_pass_in_stack_var_size_or_pad (mode, type);
11070 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11071 Return true if an argument passed on the stack should be padded upwards,
11072 i.e. if the least-significant byte has useful data.
11073 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11074 aggregate types are placed in the lowest memory address. */
11077 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
11079 if (!TARGET_AAPCS_BASED)
11080 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11082 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11089 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11090 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11091 byte of the register has useful data, and return the opposite if the
11092 most significant byte does.
11093 For AAPCS, small aggregates and small complex types are always padded
11097 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
11098 tree type, int first ATTRIBUTE_UNUSED)
11100 if (TARGET_AAPCS_BASED
11101 && BYTES_BIG_ENDIAN
11102 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
11103 && int_size_in_bytes (type) <= 4)
11106 /* Otherwise, use default padding. */
11107 return !BYTES_BIG_ENDIAN;
11111 /* Print a symbolic form of X to the debug file, F. */
11113 arm_print_value (FILE *f, rtx x)
11115 switch (GET_CODE (x))
11118 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11122 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11130 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11132 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11133 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11141 fprintf (f, "\"%s\"", XSTR (x, 0));
11145 fprintf (f, "`%s'", XSTR (x, 0));
11149 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11153 arm_print_value (f, XEXP (x, 0));
11157 arm_print_value (f, XEXP (x, 0));
11159 arm_print_value (f, XEXP (x, 1));
11167 fprintf (f, "????");
11172 /* Routines for manipulation of the constant pool. */
11174 /* Arm instructions cannot load a large constant directly into a
11175 register; they have to come from a pc relative load. The constant
11176 must therefore be placed in the addressable range of the pc
11177 relative load. Depending on the precise pc relative load
11178 instruction the range is somewhere between 256 bytes and 4k. This
11179 means that we often have to dump a constant inside a function, and
11180 generate code to branch around it.
11182 It is important to minimize this, since the branches will slow
11183 things down and make the code larger.
11185 Normally we can hide the table after an existing unconditional
11186 branch so that there is no interruption of the flow, but in the
11187 worst case the code looks like this:
11205 We fix this by performing a scan after scheduling, which notices
11206 which instructions need to have their operands fetched from the
11207 constant table and builds the table.
11209 The algorithm starts by building a table of all the constants that
11210 need fixing up and all the natural barriers in the function (places
11211 where a constant table can be dropped without breaking the flow).
11212 For each fixup we note how far the pc-relative replacement will be
11213 able to reach and the offset of the instruction into the function.
11215 Having built the table we then group the fixes together to form
11216 tables that are as large as possible (subject to addressing
11217 constraints) and emit each table of constants after the last
11218 barrier that is within range of all the instructions in the group.
11219 If a group does not contain a barrier, then we forcibly create one
11220 by inserting a jump instruction into the flow. Once the table has
11221 been inserted, the insns are then modified to reference the
11222 relevant entry in the pool.
11224 Possible enhancements to the algorithm (not implemented) are:
11226 1) For some processors and object formats, there may be benefit in
11227 aligning the pools to the start of cache lines; this alignment
11228 would need to be taken into account when calculating addressability
11231 /* These typedefs are located at the start of this file, so that
11232 they can be used in the prototypes there. This comment is to
11233 remind readers of that fact so that the following structures
11234 can be understood more easily.
11236 typedef struct minipool_node Mnode;
11237 typedef struct minipool_fixup Mfix; */
11239 struct minipool_node
11241 /* Doubly linked chain of entries. */
11244 /* The maximum offset into the code that this entry can be placed. While
11245 pushing fixes for forward references, all entries are sorted in order
11246 of increasing max_address. */
11247 HOST_WIDE_INT max_address;
11248 /* Similarly for an entry inserted for a backwards ref. */
11249 HOST_WIDE_INT min_address;
11250 /* The number of fixes referencing this entry. This can become zero
11251 if we "unpush" an entry. In this case we ignore the entry when we
11252 come to emit the code. */
11254 /* The offset from the start of the minipool. */
11255 HOST_WIDE_INT offset;
11256 /* The value in table. */
11258 /* The mode of value. */
11259 enum machine_mode mode;
11260 /* The size of the value. With iWMMXt enabled
11261 sizes > 4 also imply an alignment of 8-bytes. */
11265 struct minipool_fixup
11269 HOST_WIDE_INT address;
11271 enum machine_mode mode;
11275 HOST_WIDE_INT forwards;
11276 HOST_WIDE_INT backwards;
11279 /* Fixes less than a word need padding out to a word boundary. */
11280 #define MINIPOOL_FIX_SIZE(mode) \
11281 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11283 static Mnode * minipool_vector_head;
11284 static Mnode * minipool_vector_tail;
11285 static rtx minipool_vector_label;
11286 static int minipool_pad;
11288 /* The linked list of all minipool fixes required for this function. */
11289 Mfix * minipool_fix_head;
11290 Mfix * minipool_fix_tail;
11291 /* The fix entry for the current minipool, once it has been placed. */
11292 Mfix * minipool_barrier;
11294 /* Determines if INSN is the start of a jump table. Returns the end
11295 of the TABLE or NULL_RTX. */
11297 is_jump_table (rtx insn)
11301 if (GET_CODE (insn) == JUMP_INSN
11302 && JUMP_LABEL (insn) != NULL
11303 && ((table = next_real_insn (JUMP_LABEL (insn)))
11304 == next_real_insn (insn))
11306 && GET_CODE (table) == JUMP_INSN
11307 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11308 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11314 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11315 #define JUMP_TABLES_IN_TEXT_SECTION 0
11318 static HOST_WIDE_INT
11319 get_jump_table_size (rtx insn)
11321 /* ADDR_VECs only take room if read-only data does into the text
11323 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11325 rtx body = PATTERN (insn);
11326 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11327 HOST_WIDE_INT size;
11328 HOST_WIDE_INT modesize;
11330 modesize = GET_MODE_SIZE (GET_MODE (body));
11331 size = modesize * XVECLEN (body, elt);
11335 /* Round up size of TBB table to a halfword boundary. */
11336 size = (size + 1) & ~(HOST_WIDE_INT)1;
11339 /* No padding necessary for TBH. */
11342 /* Add two bytes for alignment on Thumb. */
11347 gcc_unreachable ();
11355 /* Move a minipool fix MP from its current location to before MAX_MP.
11356 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11357 constraints may need updating. */
11359 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11360 HOST_WIDE_INT max_address)
11362 /* The code below assumes these are different. */
11363 gcc_assert (mp != max_mp);
11365 if (max_mp == NULL)
11367 if (max_address < mp->max_address)
11368 mp->max_address = max_address;
11372 if (max_address > max_mp->max_address - mp->fix_size)
11373 mp->max_address = max_mp->max_address - mp->fix_size;
11375 mp->max_address = max_address;
11377 /* Unlink MP from its current position. Since max_mp is non-null,
11378 mp->prev must be non-null. */
11379 mp->prev->next = mp->next;
11380 if (mp->next != NULL)
11381 mp->next->prev = mp->prev;
11383 minipool_vector_tail = mp->prev;
11385 /* Re-insert it before MAX_MP. */
11387 mp->prev = max_mp->prev;
11390 if (mp->prev != NULL)
11391 mp->prev->next = mp;
11393 minipool_vector_head = mp;
11396 /* Save the new entry. */
11399 /* Scan over the preceding entries and adjust their addresses as
11401 while (mp->prev != NULL
11402 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11404 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11411 /* Add a constant to the minipool for a forward reference. Returns the
11412 node added or NULL if the constant will not fit in this pool. */
11414 add_minipool_forward_ref (Mfix *fix)
11416 /* If set, max_mp is the first pool_entry that has a lower
11417 constraint than the one we are trying to add. */
11418 Mnode * max_mp = NULL;
11419 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11422 /* If the minipool starts before the end of FIX->INSN then this FIX
11423 can not be placed into the current pool. Furthermore, adding the
11424 new constant pool entry may cause the pool to start FIX_SIZE bytes
11426 if (minipool_vector_head &&
11427 (fix->address + get_attr_length (fix->insn)
11428 >= minipool_vector_head->max_address - fix->fix_size))
11431 /* Scan the pool to see if a constant with the same value has
11432 already been added. While we are doing this, also note the
11433 location where we must insert the constant if it doesn't already
11435 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11437 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11438 && fix->mode == mp->mode
11439 && (GET_CODE (fix->value) != CODE_LABEL
11440 || (CODE_LABEL_NUMBER (fix->value)
11441 == CODE_LABEL_NUMBER (mp->value)))
11442 && rtx_equal_p (fix->value, mp->value))
11444 /* More than one fix references this entry. */
11446 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11449 /* Note the insertion point if necessary. */
11451 && mp->max_address > max_address)
11454 /* If we are inserting an 8-bytes aligned quantity and
11455 we have not already found an insertion point, then
11456 make sure that all such 8-byte aligned quantities are
11457 placed at the start of the pool. */
11458 if (ARM_DOUBLEWORD_ALIGN
11460 && fix->fix_size >= 8
11461 && mp->fix_size < 8)
11464 max_address = mp->max_address;
11468 /* The value is not currently in the minipool, so we need to create
11469 a new entry for it. If MAX_MP is NULL, the entry will be put on
11470 the end of the list since the placement is less constrained than
11471 any existing entry. Otherwise, we insert the new fix before
11472 MAX_MP and, if necessary, adjust the constraints on the other
11475 mp->fix_size = fix->fix_size;
11476 mp->mode = fix->mode;
11477 mp->value = fix->value;
11479 /* Not yet required for a backwards ref. */
11480 mp->min_address = -65536;
11482 if (max_mp == NULL)
11484 mp->max_address = max_address;
11486 mp->prev = minipool_vector_tail;
11488 if (mp->prev == NULL)
11490 minipool_vector_head = mp;
11491 minipool_vector_label = gen_label_rtx ();
11494 mp->prev->next = mp;
11496 minipool_vector_tail = mp;
11500 if (max_address > max_mp->max_address - mp->fix_size)
11501 mp->max_address = max_mp->max_address - mp->fix_size;
11503 mp->max_address = max_address;
11506 mp->prev = max_mp->prev;
11508 if (mp->prev != NULL)
11509 mp->prev->next = mp;
11511 minipool_vector_head = mp;
11514 /* Save the new entry. */
11517 /* Scan over the preceding entries and adjust their addresses as
11519 while (mp->prev != NULL
11520 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11522 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11530 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11531 HOST_WIDE_INT min_address)
11533 HOST_WIDE_INT offset;
11535 /* The code below assumes these are different. */
11536 gcc_assert (mp != min_mp);
11538 if (min_mp == NULL)
11540 if (min_address > mp->min_address)
11541 mp->min_address = min_address;
11545 /* We will adjust this below if it is too loose. */
11546 mp->min_address = min_address;
11548 /* Unlink MP from its current position. Since min_mp is non-null,
11549 mp->next must be non-null. */
11550 mp->next->prev = mp->prev;
11551 if (mp->prev != NULL)
11552 mp->prev->next = mp->next;
11554 minipool_vector_head = mp->next;
11556 /* Reinsert it after MIN_MP. */
11558 mp->next = min_mp->next;
11560 if (mp->next != NULL)
11561 mp->next->prev = mp;
11563 minipool_vector_tail = mp;
11569 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11571 mp->offset = offset;
11572 if (mp->refcount > 0)
11573 offset += mp->fix_size;
11575 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11576 mp->next->min_address = mp->min_address + mp->fix_size;
11582 /* Add a constant to the minipool for a backward reference. Returns the
11583 node added or NULL if the constant will not fit in this pool.
11585 Note that the code for insertion for a backwards reference can be
11586 somewhat confusing because the calculated offsets for each fix do
11587 not take into account the size of the pool (which is still under
11590 add_minipool_backward_ref (Mfix *fix)
11592 /* If set, min_mp is the last pool_entry that has a lower constraint
11593 than the one we are trying to add. */
11594 Mnode *min_mp = NULL;
11595 /* This can be negative, since it is only a constraint. */
11596 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11599 /* If we can't reach the current pool from this insn, or if we can't
11600 insert this entry at the end of the pool without pushing other
11601 fixes out of range, then we don't try. This ensures that we
11602 can't fail later on. */
11603 if (min_address >= minipool_barrier->address
11604 || (minipool_vector_tail->min_address + fix->fix_size
11605 >= minipool_barrier->address))
11608 /* Scan the pool to see if a constant with the same value has
11609 already been added. While we are doing this, also note the
11610 location where we must insert the constant if it doesn't already
11612 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11614 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11615 && fix->mode == mp->mode
11616 && (GET_CODE (fix->value) != CODE_LABEL
11617 || (CODE_LABEL_NUMBER (fix->value)
11618 == CODE_LABEL_NUMBER (mp->value)))
11619 && rtx_equal_p (fix->value, mp->value)
11620 /* Check that there is enough slack to move this entry to the
11621 end of the table (this is conservative). */
11622 && (mp->max_address
11623 > (minipool_barrier->address
11624 + minipool_vector_tail->offset
11625 + minipool_vector_tail->fix_size)))
11628 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11631 if (min_mp != NULL)
11632 mp->min_address += fix->fix_size;
11635 /* Note the insertion point if necessary. */
11636 if (mp->min_address < min_address)
11638 /* For now, we do not allow the insertion of 8-byte alignment
11639 requiring nodes anywhere but at the start of the pool. */
11640 if (ARM_DOUBLEWORD_ALIGN
11641 && fix->fix_size >= 8 && mp->fix_size < 8)
11646 else if (mp->max_address
11647 < minipool_barrier->address + mp->offset + fix->fix_size)
11649 /* Inserting before this entry would push the fix beyond
11650 its maximum address (which can happen if we have
11651 re-located a forwards fix); force the new fix to come
11653 if (ARM_DOUBLEWORD_ALIGN
11654 && fix->fix_size >= 8 && mp->fix_size < 8)
11659 min_address = mp->min_address + fix->fix_size;
11662 /* Do not insert a non-8-byte aligned quantity before 8-byte
11663 aligned quantities. */
11664 else if (ARM_DOUBLEWORD_ALIGN
11665 && fix->fix_size < 8
11666 && mp->fix_size >= 8)
11669 min_address = mp->min_address + fix->fix_size;
11674 /* We need to create a new entry. */
11676 mp->fix_size = fix->fix_size;
11677 mp->mode = fix->mode;
11678 mp->value = fix->value;
11680 mp->max_address = minipool_barrier->address + 65536;
11682 mp->min_address = min_address;
11684 if (min_mp == NULL)
11687 mp->next = minipool_vector_head;
11689 if (mp->next == NULL)
11691 minipool_vector_tail = mp;
11692 minipool_vector_label = gen_label_rtx ();
11695 mp->next->prev = mp;
11697 minipool_vector_head = mp;
11701 mp->next = min_mp->next;
11705 if (mp->next != NULL)
11706 mp->next->prev = mp;
11708 minipool_vector_tail = mp;
11711 /* Save the new entry. */
11719 /* Scan over the following entries and adjust their offsets. */
11720 while (mp->next != NULL)
11722 if (mp->next->min_address < mp->min_address + mp->fix_size)
11723 mp->next->min_address = mp->min_address + mp->fix_size;
11726 mp->next->offset = mp->offset + mp->fix_size;
11728 mp->next->offset = mp->offset;
11737 assign_minipool_offsets (Mfix *barrier)
11739 HOST_WIDE_INT offset = 0;
11742 minipool_barrier = barrier;
11744 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11746 mp->offset = offset;
11748 if (mp->refcount > 0)
11749 offset += mp->fix_size;
11753 /* Output the literal table */
11755 dump_minipool (rtx scan)
11761 if (ARM_DOUBLEWORD_ALIGN)
11762 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11763 if (mp->refcount > 0 && mp->fix_size >= 8)
11770 fprintf (dump_file,
11771 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11772 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11774 scan = emit_label_after (gen_label_rtx (), scan);
11775 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11776 scan = emit_label_after (minipool_vector_label, scan);
11778 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11780 if (mp->refcount > 0)
11784 fprintf (dump_file,
11785 ";; Offset %u, min %ld, max %ld ",
11786 (unsigned) mp->offset, (unsigned long) mp->min_address,
11787 (unsigned long) mp->max_address);
11788 arm_print_value (dump_file, mp->value);
11789 fputc ('\n', dump_file);
11792 switch (mp->fix_size)
11794 #ifdef HAVE_consttable_1
11796 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11800 #ifdef HAVE_consttable_2
11802 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11806 #ifdef HAVE_consttable_4
11808 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11812 #ifdef HAVE_consttable_8
11814 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11818 #ifdef HAVE_consttable_16
11820 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11825 gcc_unreachable ();
11833 minipool_vector_head = minipool_vector_tail = NULL;
11834 scan = emit_insn_after (gen_consttable_end (), scan);
11835 scan = emit_barrier_after (scan);
11838 /* Return the cost of forcibly inserting a barrier after INSN. */
11840 arm_barrier_cost (rtx insn)
11842 /* Basing the location of the pool on the loop depth is preferable,
11843 but at the moment, the basic block information seems to be
11844 corrupt by this stage of the compilation. */
11845 int base_cost = 50;
11846 rtx next = next_nonnote_insn (insn);
11848 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11851 switch (GET_CODE (insn))
11854 /* It will always be better to place the table before the label, rather
11863 return base_cost - 10;
11866 return base_cost + 10;
11870 /* Find the best place in the insn stream in the range
11871 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11872 Create the barrier by inserting a jump and add a new fix entry for
11875 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11877 HOST_WIDE_INT count = 0;
11879 rtx from = fix->insn;
11880 /* The instruction after which we will insert the jump. */
11881 rtx selected = NULL;
11883 /* The address at which the jump instruction will be placed. */
11884 HOST_WIDE_INT selected_address;
11886 HOST_WIDE_INT max_count = max_address - fix->address;
11887 rtx label = gen_label_rtx ();
11889 selected_cost = arm_barrier_cost (from);
11890 selected_address = fix->address;
11892 while (from && count < max_count)
11897 /* This code shouldn't have been called if there was a natural barrier
11899 gcc_assert (GET_CODE (from) != BARRIER);
11901 /* Count the length of this insn. */
11902 count += get_attr_length (from);
11904 /* If there is a jump table, add its length. */
11905 tmp = is_jump_table (from);
11908 count += get_jump_table_size (tmp);
11910 /* Jump tables aren't in a basic block, so base the cost on
11911 the dispatch insn. If we select this location, we will
11912 still put the pool after the table. */
11913 new_cost = arm_barrier_cost (from);
11915 if (count < max_count
11916 && (!selected || new_cost <= selected_cost))
11919 selected_cost = new_cost;
11920 selected_address = fix->address + count;
11923 /* Continue after the dispatch table. */
11924 from = NEXT_INSN (tmp);
11928 new_cost = arm_barrier_cost (from);
11930 if (count < max_count
11931 && (!selected || new_cost <= selected_cost))
11934 selected_cost = new_cost;
11935 selected_address = fix->address + count;
11938 from = NEXT_INSN (from);
11941 /* Make sure that we found a place to insert the jump. */
11942 gcc_assert (selected);
11944 /* Make sure we do not split a call and its corresponding
11945 CALL_ARG_LOCATION note. */
11946 if (CALL_P (selected))
11948 rtx next = NEXT_INSN (selected);
11949 if (next && NOTE_P (next)
11950 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
11954 /* Create a new JUMP_INSN that branches around a barrier. */
11955 from = emit_jump_insn_after (gen_jump (label), selected);
11956 JUMP_LABEL (from) = label;
11957 barrier = emit_barrier_after (from);
11958 emit_label_after (label, barrier);
11960 /* Create a minipool barrier entry for the new barrier. */
11961 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11962 new_fix->insn = barrier;
11963 new_fix->address = selected_address;
11964 new_fix->next = fix->next;
11965 fix->next = new_fix;
11970 /* Record that there is a natural barrier in the insn stream at
11973 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11975 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11978 fix->address = address;
11981 if (minipool_fix_head != NULL)
11982 minipool_fix_tail->next = fix;
11984 minipool_fix_head = fix;
11986 minipool_fix_tail = fix;
11989 /* Record INSN, which will need fixing up to load a value from the
11990 minipool. ADDRESS is the offset of the insn since the start of the
11991 function; LOC is a pointer to the part of the insn which requires
11992 fixing; VALUE is the constant that must be loaded, which is of type
11995 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11996 enum machine_mode mode, rtx value)
11998 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12001 fix->address = address;
12004 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
12005 fix->value = value;
12006 fix->forwards = get_attr_pool_range (insn);
12007 fix->backwards = get_attr_neg_pool_range (insn);
12008 fix->minipool = NULL;
12010 /* If an insn doesn't have a range defined for it, then it isn't
12011 expecting to be reworked by this code. Better to stop now than
12012 to generate duff assembly code. */
12013 gcc_assert (fix->forwards || fix->backwards);
12015 /* If an entry requires 8-byte alignment then assume all constant pools
12016 require 4 bytes of padding. Trying to do this later on a per-pool
12017 basis is awkward because existing pool entries have to be modified. */
12018 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
12023 fprintf (dump_file,
12024 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12025 GET_MODE_NAME (mode),
12026 INSN_UID (insn), (unsigned long) address,
12027 -1 * (long)fix->backwards, (long)fix->forwards);
12028 arm_print_value (dump_file, fix->value);
12029 fprintf (dump_file, "\n");
12032 /* Add it to the chain of fixes. */
12035 if (minipool_fix_head != NULL)
12036 minipool_fix_tail->next = fix;
12038 minipool_fix_head = fix;
12040 minipool_fix_tail = fix;
12043 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12044 Returns the number of insns needed, or 99 if we don't know how to
12047 arm_const_double_inline_cost (rtx val)
12049 rtx lowpart, highpart;
12050 enum machine_mode mode;
12052 mode = GET_MODE (val);
12054 if (mode == VOIDmode)
12057 gcc_assert (GET_MODE_SIZE (mode) == 8);
12059 lowpart = gen_lowpart (SImode, val);
12060 highpart = gen_highpart_mode (SImode, mode, val);
12062 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12063 gcc_assert (GET_CODE (highpart) == CONST_INT);
12065 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12066 NULL_RTX, NULL_RTX, 0, 0)
12067 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12068 NULL_RTX, NULL_RTX, 0, 0));
12071 /* Return true if it is worthwhile to split a 64-bit constant into two
12072 32-bit operations. This is the case if optimizing for size, or
12073 if we have load delay slots, or if one 32-bit part can be done with
12074 a single data operation. */
12076 arm_const_double_by_parts (rtx val)
12078 enum machine_mode mode = GET_MODE (val);
12081 if (optimize_size || arm_ld_sched)
12084 if (mode == VOIDmode)
12087 part = gen_highpart_mode (SImode, mode, val);
12089 gcc_assert (GET_CODE (part) == CONST_INT);
12091 if (const_ok_for_arm (INTVAL (part))
12092 || const_ok_for_arm (~INTVAL (part)))
12095 part = gen_lowpart (SImode, val);
12097 gcc_assert (GET_CODE (part) == CONST_INT);
12099 if (const_ok_for_arm (INTVAL (part))
12100 || const_ok_for_arm (~INTVAL (part)))
12106 /* Return true if it is possible to inline both the high and low parts
12107 of a 64-bit constant into 32-bit data processing instructions. */
12109 arm_const_double_by_immediates (rtx val)
12111 enum machine_mode mode = GET_MODE (val);
12114 if (mode == VOIDmode)
12117 part = gen_highpart_mode (SImode, mode, val);
12119 gcc_assert (GET_CODE (part) == CONST_INT);
12121 if (!const_ok_for_arm (INTVAL (part)))
12124 part = gen_lowpart (SImode, val);
12126 gcc_assert (GET_CODE (part) == CONST_INT);
12128 if (!const_ok_for_arm (INTVAL (part)))
12134 /* Scan INSN and note any of its operands that need fixing.
12135 If DO_PUSHES is false we do not actually push any of the fixups
12136 needed. The function returns TRUE if any fixups were needed/pushed.
12137 This is used by arm_memory_load_p() which needs to know about loads
12138 of constants that will be converted into minipool loads. */
12140 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12142 bool result = false;
12145 extract_insn (insn);
12147 if (!constrain_operands (1))
12148 fatal_insn_not_found (insn);
12150 if (recog_data.n_alternatives == 0)
12153 /* Fill in recog_op_alt with information about the constraints of
12155 preprocess_constraints ();
12157 for (opno = 0; opno < recog_data.n_operands; opno++)
12159 /* Things we need to fix can only occur in inputs. */
12160 if (recog_data.operand_type[opno] != OP_IN)
12163 /* If this alternative is a memory reference, then any mention
12164 of constants in this alternative is really to fool reload
12165 into allowing us to accept one there. We need to fix them up
12166 now so that we output the right code. */
12167 if (recog_op_alt[opno][which_alternative].memory_ok)
12169 rtx op = recog_data.operand[opno];
12171 if (CONSTANT_P (op))
12174 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12175 recog_data.operand_mode[opno], op);
12178 else if (GET_CODE (op) == MEM
12179 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12180 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12184 rtx cop = avoid_constant_pool_reference (op);
12186 /* Casting the address of something to a mode narrower
12187 than a word can cause avoid_constant_pool_reference()
12188 to return the pool reference itself. That's no good to
12189 us here. Lets just hope that we can use the
12190 constant pool value directly. */
12192 cop = get_pool_constant (XEXP (op, 0));
12194 push_minipool_fix (insn, address,
12195 recog_data.operand_loc[opno],
12196 recog_data.operand_mode[opno], cop);
12207 /* Convert instructions to their cc-clobbering variant if possible, since
12208 that allows us to use smaller encodings. */
12211 thumb2_reorg (void)
12216 INIT_REG_SET (&live);
12218 /* We are freeing block_for_insn in the toplev to keep compatibility
12219 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12220 compute_bb_for_insn ();
12227 COPY_REG_SET (&live, DF_LR_OUT (bb));
12228 df_simulate_initialize_backwards (bb, &live);
12229 FOR_BB_INSNS_REVERSE (bb, insn)
12231 if (NONJUMP_INSN_P (insn)
12232 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12234 rtx pat = PATTERN (insn);
12235 if (GET_CODE (pat) == SET
12236 && low_register_operand (XEXP (pat, 0), SImode)
12237 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12238 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12239 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12241 rtx dst = XEXP (pat, 0);
12242 rtx src = XEXP (pat, 1);
12243 rtx op0 = XEXP (src, 0);
12244 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
12245 ? XEXP (src, 1) : NULL);
12247 if (rtx_equal_p (dst, op0)
12248 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12250 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12251 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12252 rtvec vec = gen_rtvec (2, pat, clobber);
12254 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12255 INSN_CODE (insn) = -1;
12257 /* We can also handle a commutative operation where the
12258 second operand matches the destination. */
12259 else if (op1 && rtx_equal_p (dst, op1))
12261 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12262 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12265 src = copy_rtx (src);
12266 XEXP (src, 0) = op1;
12267 XEXP (src, 1) = op0;
12268 pat = gen_rtx_SET (VOIDmode, dst, src);
12269 vec = gen_rtvec (2, pat, clobber);
12270 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12271 INSN_CODE (insn) = -1;
12276 if (NONDEBUG_INSN_P (insn))
12277 df_simulate_one_insn_backwards (bb, insn, &live);
12281 CLEAR_REG_SET (&live);
12284 /* Gcc puts the pool in the wrong place for ARM, since we can only
12285 load addresses a limited distance around the pc. We do some
12286 special munging to move the constant pool values to the correct
12287 point in the code. */
12292 HOST_WIDE_INT address = 0;
12298 minipool_fix_head = minipool_fix_tail = NULL;
12300 /* The first insn must always be a note, or the code below won't
12301 scan it properly. */
12302 insn = get_insns ();
12303 gcc_assert (GET_CODE (insn) == NOTE);
12306 /* Scan all the insns and record the operands that will need fixing. */
12307 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12309 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12310 && (arm_cirrus_insn_p (insn)
12311 || GET_CODE (insn) == JUMP_INSN
12312 || arm_memory_load_p (insn)))
12313 cirrus_reorg (insn);
12315 if (GET_CODE (insn) == BARRIER)
12316 push_minipool_barrier (insn, address);
12317 else if (INSN_P (insn))
12321 note_invalid_constants (insn, address, true);
12322 address += get_attr_length (insn);
12324 /* If the insn is a vector jump, add the size of the table
12325 and skip the table. */
12326 if ((table = is_jump_table (insn)) != NULL)
12328 address += get_jump_table_size (table);
12334 fix = minipool_fix_head;
12336 /* Now scan the fixups and perform the required changes. */
12341 Mfix * last_added_fix;
12342 Mfix * last_barrier = NULL;
12345 /* Skip any further barriers before the next fix. */
12346 while (fix && GET_CODE (fix->insn) == BARRIER)
12349 /* No more fixes. */
12353 last_added_fix = NULL;
12355 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12357 if (GET_CODE (ftmp->insn) == BARRIER)
12359 if (ftmp->address >= minipool_vector_head->max_address)
12362 last_barrier = ftmp;
12364 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12367 last_added_fix = ftmp; /* Keep track of the last fix added. */
12370 /* If we found a barrier, drop back to that; any fixes that we
12371 could have reached but come after the barrier will now go in
12372 the next mini-pool. */
12373 if (last_barrier != NULL)
12375 /* Reduce the refcount for those fixes that won't go into this
12377 for (fdel = last_barrier->next;
12378 fdel && fdel != ftmp;
12381 fdel->minipool->refcount--;
12382 fdel->minipool = NULL;
12385 ftmp = last_barrier;
12389 /* ftmp is first fix that we can't fit into this pool and
12390 there no natural barriers that we could use. Insert a
12391 new barrier in the code somewhere between the previous
12392 fix and this one, and arrange to jump around it. */
12393 HOST_WIDE_INT max_address;
12395 /* The last item on the list of fixes must be a barrier, so
12396 we can never run off the end of the list of fixes without
12397 last_barrier being set. */
12400 max_address = minipool_vector_head->max_address;
12401 /* Check that there isn't another fix that is in range that
12402 we couldn't fit into this pool because the pool was
12403 already too large: we need to put the pool before such an
12404 instruction. The pool itself may come just after the
12405 fix because create_fix_barrier also allows space for a
12406 jump instruction. */
12407 if (ftmp->address < max_address)
12408 max_address = ftmp->address + 1;
12410 last_barrier = create_fix_barrier (last_added_fix, max_address);
12413 assign_minipool_offsets (last_barrier);
12417 if (GET_CODE (ftmp->insn) != BARRIER
12418 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12425 /* Scan over the fixes we have identified for this pool, fixing them
12426 up and adding the constants to the pool itself. */
12427 for (this_fix = fix; this_fix && ftmp != this_fix;
12428 this_fix = this_fix->next)
12429 if (GET_CODE (this_fix->insn) != BARRIER)
12432 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12433 minipool_vector_label),
12434 this_fix->minipool->offset);
12435 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12438 dump_minipool (last_barrier->insn);
12442 /* From now on we must synthesize any constants that we can't handle
12443 directly. This can happen if the RTL gets split during final
12444 instruction generation. */
12445 after_arm_reorg = 1;
12447 /* Free the minipool memory. */
12448 obstack_free (&minipool_obstack, minipool_startobj);
12451 /* Routines to output assembly language. */
12453 /* If the rtx is the correct value then return the string of the number.
12454 In this way we can ensure that valid double constants are generated even
12455 when cross compiling. */
12457 fp_immediate_constant (rtx x)
12462 if (!fp_consts_inited)
12465 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12466 for (i = 0; i < 8; i++)
12467 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12468 return strings_fp[i];
12470 gcc_unreachable ();
12473 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12474 static const char *
12475 fp_const_from_val (REAL_VALUE_TYPE *r)
12479 if (!fp_consts_inited)
12482 for (i = 0; i < 8; i++)
12483 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12484 return strings_fp[i];
12486 gcc_unreachable ();
12489 /* Output the operands of a LDM/STM instruction to STREAM.
12490 MASK is the ARM register set mask of which only bits 0-15 are important.
12491 REG is the base register, either the frame pointer or the stack pointer,
12492 INSTR is the possibly suffixed load or store instruction.
12493 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12496 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12497 unsigned long mask, int rfe)
12500 bool not_first = FALSE;
12502 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12503 fputc ('\t', stream);
12504 asm_fprintf (stream, instr, reg);
12505 fputc ('{', stream);
12507 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12508 if (mask & (1 << i))
12511 fprintf (stream, ", ");
12513 asm_fprintf (stream, "%r", i);
12518 fprintf (stream, "}^\n");
12520 fprintf (stream, "}\n");
12524 /* Output a FLDMD instruction to STREAM.
12525 BASE if the register containing the address.
12526 REG and COUNT specify the register range.
12527 Extra registers may be added to avoid hardware bugs.
12529 We output FLDMD even for ARMv5 VFP implementations. Although
12530 FLDMD is technically not supported until ARMv6, it is believed
12531 that all VFP implementations support its use in this context. */
12534 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12538 /* Workaround ARM10 VFPr1 bug. */
12539 if (count == 2 && !arm_arch6)
12546 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12547 load into multiple parts if we have to handle more than 16 registers. */
12550 vfp_output_fldmd (stream, base, reg, 16);
12551 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12555 fputc ('\t', stream);
12556 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12558 for (i = reg; i < reg + count; i++)
12561 fputs (", ", stream);
12562 asm_fprintf (stream, "d%d", i);
12564 fputs ("}\n", stream);
12569 /* Output the assembly for a store multiple. */
12572 vfp_output_fstmd (rtx * operands)
12579 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12580 p = strlen (pattern);
12582 gcc_assert (GET_CODE (operands[1]) == REG);
12584 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12585 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12587 p += sprintf (&pattern[p], ", d%d", base + i);
12589 strcpy (&pattern[p], "}");
12591 output_asm_insn (pattern, operands);
12596 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12597 number of bytes pushed. */
12600 vfp_emit_fstmd (int base_reg, int count)
12607 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12608 register pairs are stored by a store multiple insn. We avoid this
12609 by pushing an extra pair. */
12610 if (count == 2 && !arm_arch6)
12612 if (base_reg == LAST_VFP_REGNUM - 3)
12617 /* FSTMD may not store more than 16 doubleword registers at once. Split
12618 larger stores into multiple parts (up to a maximum of two, in
12623 /* NOTE: base_reg is an internal register number, so each D register
12625 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12626 saved += vfp_emit_fstmd (base_reg, 16);
12630 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12631 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12633 reg = gen_rtx_REG (DFmode, base_reg);
12636 XVECEXP (par, 0, 0)
12637 = gen_rtx_SET (VOIDmode,
12640 gen_rtx_PRE_MODIFY (Pmode,
12643 (stack_pointer_rtx,
12646 gen_rtx_UNSPEC (BLKmode,
12647 gen_rtvec (1, reg),
12648 UNSPEC_PUSH_MULT));
12650 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12651 plus_constant (stack_pointer_rtx, -(count * 8)));
12652 RTX_FRAME_RELATED_P (tmp) = 1;
12653 XVECEXP (dwarf, 0, 0) = tmp;
12655 tmp = gen_rtx_SET (VOIDmode,
12656 gen_frame_mem (DFmode, stack_pointer_rtx),
12658 RTX_FRAME_RELATED_P (tmp) = 1;
12659 XVECEXP (dwarf, 0, 1) = tmp;
12661 for (i = 1; i < count; i++)
12663 reg = gen_rtx_REG (DFmode, base_reg);
12665 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12667 tmp = gen_rtx_SET (VOIDmode,
12668 gen_frame_mem (DFmode,
12669 plus_constant (stack_pointer_rtx,
12672 RTX_FRAME_RELATED_P (tmp) = 1;
12673 XVECEXP (dwarf, 0, i + 1) = tmp;
12676 par = emit_insn (par);
12677 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12678 RTX_FRAME_RELATED_P (par) = 1;
12683 /* Emit a call instruction with pattern PAT. ADDR is the address of
12684 the call target. */
12687 arm_emit_call_insn (rtx pat, rtx addr)
12691 insn = emit_call_insn (pat);
12693 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12694 If the call might use such an entry, add a use of the PIC register
12695 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12696 if (TARGET_VXWORKS_RTP
12698 && GET_CODE (addr) == SYMBOL_REF
12699 && (SYMBOL_REF_DECL (addr)
12700 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12701 : !SYMBOL_REF_LOCAL_P (addr)))
12703 require_pic_register ();
12704 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12708 /* Output a 'call' insn. */
12710 output_call (rtx *operands)
12712 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12714 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12715 if (REGNO (operands[0]) == LR_REGNUM)
12717 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12718 output_asm_insn ("mov%?\t%0, %|lr", operands);
12721 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12723 if (TARGET_INTERWORK || arm_arch4t)
12724 output_asm_insn ("bx%?\t%0", operands);
12726 output_asm_insn ("mov%?\t%|pc, %0", operands);
12731 /* Output a 'call' insn that is a reference in memory. This is
12732 disabled for ARMv5 and we prefer a blx instead because otherwise
12733 there's a significant performance overhead. */
12735 output_call_mem (rtx *operands)
12737 gcc_assert (!arm_arch5);
12738 if (TARGET_INTERWORK)
12740 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12741 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12742 output_asm_insn ("bx%?\t%|ip", operands);
12744 else if (regno_use_in (LR_REGNUM, operands[0]))
12746 /* LR is used in the memory address. We load the address in the
12747 first instruction. It's safe to use IP as the target of the
12748 load since the call will kill it anyway. */
12749 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12750 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12752 output_asm_insn ("bx%?\t%|ip", operands);
12754 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12758 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12759 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12766 /* Output a move from arm registers to an fpa registers.
12767 OPERANDS[0] is an fpa register.
12768 OPERANDS[1] is the first registers of an arm register pair. */
12770 output_mov_long_double_fpa_from_arm (rtx *operands)
12772 int arm_reg0 = REGNO (operands[1]);
12775 gcc_assert (arm_reg0 != IP_REGNUM);
12777 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12778 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12779 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12781 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12782 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12787 /* Output a move from an fpa register to arm registers.
12788 OPERANDS[0] is the first registers of an arm register pair.
12789 OPERANDS[1] is an fpa register. */
12791 output_mov_long_double_arm_from_fpa (rtx *operands)
12793 int arm_reg0 = REGNO (operands[0]);
12796 gcc_assert (arm_reg0 != IP_REGNUM);
12798 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12799 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12800 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12802 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12803 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12807 /* Output a move from arm registers to arm registers of a long double
12808 OPERANDS[0] is the destination.
12809 OPERANDS[1] is the source. */
12811 output_mov_long_double_arm_from_arm (rtx *operands)
12813 /* We have to be careful here because the two might overlap. */
12814 int dest_start = REGNO (operands[0]);
12815 int src_start = REGNO (operands[1]);
12819 if (dest_start < src_start)
12821 for (i = 0; i < 3; i++)
12823 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12824 ops[1] = gen_rtx_REG (SImode, src_start + i);
12825 output_asm_insn ("mov%?\t%0, %1", ops);
12830 for (i = 2; i >= 0; i--)
12832 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12833 ops[1] = gen_rtx_REG (SImode, src_start + i);
12834 output_asm_insn ("mov%?\t%0, %1", ops);
12842 arm_emit_movpair (rtx dest, rtx src)
12844 /* If the src is an immediate, simplify it. */
12845 if (CONST_INT_P (src))
12847 HOST_WIDE_INT val = INTVAL (src);
12848 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12849 if ((val >> 16) & 0x0000ffff)
12850 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12852 GEN_INT ((val >> 16) & 0x0000ffff));
12855 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12856 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12859 /* Output a move from arm registers to an fpa registers.
12860 OPERANDS[0] is an fpa register.
12861 OPERANDS[1] is the first registers of an arm register pair. */
12863 output_mov_double_fpa_from_arm (rtx *operands)
12865 int arm_reg0 = REGNO (operands[1]);
12868 gcc_assert (arm_reg0 != IP_REGNUM);
12870 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12871 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12872 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12873 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12877 /* Output a move from an fpa register to arm registers.
12878 OPERANDS[0] is the first registers of an arm register pair.
12879 OPERANDS[1] is an fpa register. */
12881 output_mov_double_arm_from_fpa (rtx *operands)
12883 int arm_reg0 = REGNO (operands[0]);
12886 gcc_assert (arm_reg0 != IP_REGNUM);
12888 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12889 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12890 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12891 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12895 /* Output a move between double words. It must be REG<-MEM
12898 output_move_double (rtx *operands)
12900 enum rtx_code code0 = GET_CODE (operands[0]);
12901 enum rtx_code code1 = GET_CODE (operands[1]);
12906 unsigned int reg0 = REGNO (operands[0]);
12908 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12910 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12912 switch (GET_CODE (XEXP (operands[1], 0)))
12916 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12917 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12919 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12923 gcc_assert (TARGET_LDRD);
12924 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12929 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12931 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12936 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12938 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12942 gcc_assert (TARGET_LDRD);
12943 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12948 /* Autoicrement addressing modes should never have overlapping
12949 base and destination registers, and overlapping index registers
12950 are already prohibited, so this doesn't need to worry about
12952 otherops[0] = operands[0];
12953 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12954 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12956 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12958 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12960 /* Registers overlap so split out the increment. */
12961 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12962 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12966 /* Use a single insn if we can.
12967 FIXME: IWMMXT allows offsets larger than ldrd can
12968 handle, fix these up with a pair of ldr. */
12970 || GET_CODE (otherops[2]) != CONST_INT
12971 || (INTVAL (otherops[2]) > -256
12972 && INTVAL (otherops[2]) < 256))
12973 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12976 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12977 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12983 /* Use a single insn if we can.
12984 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12985 fix these up with a pair of ldr. */
12987 || GET_CODE (otherops[2]) != CONST_INT
12988 || (INTVAL (otherops[2]) > -256
12989 && INTVAL (otherops[2]) < 256))
12990 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12993 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12994 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
13001 /* We might be able to use ldrd %0, %1 here. However the range is
13002 different to ldr/adr, and it is broken on some ARMv7-M
13003 implementations. */
13004 /* Use the second register of the pair to avoid problematic
13006 otherops[1] = operands[1];
13007 output_asm_insn ("adr%?\t%0, %1", otherops);
13008 operands[1] = otherops[0];
13010 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13012 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
13015 /* ??? This needs checking for thumb2. */
13017 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
13018 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
13020 otherops[0] = operands[0];
13021 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
13022 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
13024 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
13026 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13028 switch ((int) INTVAL (otherops[2]))
13031 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
13036 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
13041 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
13045 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
13046 operands[1] = otherops[0];
13048 && (GET_CODE (otherops[2]) == REG
13050 || (GET_CODE (otherops[2]) == CONST_INT
13051 && INTVAL (otherops[2]) > -256
13052 && INTVAL (otherops[2]) < 256)))
13054 if (reg_overlap_mentioned_p (operands[0],
13058 /* Swap base and index registers over to
13059 avoid a conflict. */
13061 otherops[1] = otherops[2];
13064 /* If both registers conflict, it will usually
13065 have been fixed by a splitter. */
13066 if (reg_overlap_mentioned_p (operands[0], otherops[2])
13067 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
13069 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13070 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13074 otherops[0] = operands[0];
13075 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
13080 if (GET_CODE (otherops[2]) == CONST_INT)
13082 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
13083 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
13085 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13088 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13091 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
13094 return "ldr%(d%)\t%0, [%1]";
13096 return "ldm%(ia%)\t%1, %M0";
13100 otherops[1] = adjust_address (operands[1], SImode, 4);
13101 /* Take care of overlapping base/data reg. */
13102 if (reg_mentioned_p (operands[0], operands[1]))
13104 output_asm_insn ("ldr%?\t%0, %1", otherops);
13105 output_asm_insn ("ldr%?\t%0, %1", operands);
13109 output_asm_insn ("ldr%?\t%0, %1", operands);
13110 output_asm_insn ("ldr%?\t%0, %1", otherops);
13117 /* Constraints should ensure this. */
13118 gcc_assert (code0 == MEM && code1 == REG);
13119 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13121 switch (GET_CODE (XEXP (operands[0], 0)))
13125 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
13127 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13131 gcc_assert (TARGET_LDRD);
13132 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
13137 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
13139 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
13144 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
13146 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
13150 gcc_assert (TARGET_LDRD);
13151 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
13156 otherops[0] = operands[1];
13157 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
13158 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13160 /* IWMMXT allows offsets larger than ldrd can handle,
13161 fix these up with a pair of ldr. */
13163 && GET_CODE (otherops[2]) == CONST_INT
13164 && (INTVAL(otherops[2]) <= -256
13165 || INTVAL(otherops[2]) >= 256))
13167 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13169 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13170 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13174 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13175 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13178 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13179 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13181 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13185 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13186 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13188 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13191 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13197 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13203 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13208 && (GET_CODE (otherops[2]) == REG
13210 || (GET_CODE (otherops[2]) == CONST_INT
13211 && INTVAL (otherops[2]) > -256
13212 && INTVAL (otherops[2]) < 256)))
13214 otherops[0] = operands[1];
13215 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13216 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13222 otherops[0] = adjust_address (operands[0], SImode, 4);
13223 otherops[1] = operands[1];
13224 output_asm_insn ("str%?\t%1, %0", operands);
13225 output_asm_insn ("str%?\t%H1, %0", otherops);
13232 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13233 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13236 output_move_quad (rtx *operands)
13238 if (REG_P (operands[0]))
13240 /* Load, or reg->reg move. */
13242 if (MEM_P (operands[1]))
13244 switch (GET_CODE (XEXP (operands[1], 0)))
13247 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13252 output_asm_insn ("adr%?\t%0, %1", operands);
13253 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13257 gcc_unreachable ();
13265 gcc_assert (REG_P (operands[1]));
13267 dest = REGNO (operands[0]);
13268 src = REGNO (operands[1]);
13270 /* This seems pretty dumb, but hopefully GCC won't try to do it
13273 for (i = 0; i < 4; i++)
13275 ops[0] = gen_rtx_REG (SImode, dest + i);
13276 ops[1] = gen_rtx_REG (SImode, src + i);
13277 output_asm_insn ("mov%?\t%0, %1", ops);
13280 for (i = 3; i >= 0; i--)
13282 ops[0] = gen_rtx_REG (SImode, dest + i);
13283 ops[1] = gen_rtx_REG (SImode, src + i);
13284 output_asm_insn ("mov%?\t%0, %1", ops);
13290 gcc_assert (MEM_P (operands[0]));
13291 gcc_assert (REG_P (operands[1]));
13292 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13294 switch (GET_CODE (XEXP (operands[0], 0)))
13297 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13301 gcc_unreachable ();
13308 /* Output a VFP load or store instruction. */
13311 output_move_vfp (rtx *operands)
13313 rtx reg, mem, addr, ops[2];
13314 int load = REG_P (operands[0]);
13315 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13316 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13319 enum machine_mode mode;
13321 reg = operands[!load];
13322 mem = operands[load];
13324 mode = GET_MODE (reg);
13326 gcc_assert (REG_P (reg));
13327 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13328 gcc_assert (mode == SFmode
13332 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13333 gcc_assert (MEM_P (mem));
13335 addr = XEXP (mem, 0);
13337 switch (GET_CODE (addr))
13340 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13341 ops[0] = XEXP (addr, 0);
13346 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13347 ops[0] = XEXP (addr, 0);
13352 templ = "f%s%c%%?\t%%%s0, %%1%s";
13358 sprintf (buff, templ,
13359 load ? "ld" : "st",
13362 integer_p ? "\t%@ int" : "");
13363 output_asm_insn (buff, ops);
13368 /* Output a Neon quad-word load or store, or a load or store for
13369 larger structure modes.
13371 WARNING: The ordering of elements is weird in big-endian mode,
13372 because we use VSTM, as required by the EABI. GCC RTL defines
13373 element ordering based on in-memory order. This can be differ
13374 from the architectural ordering of elements within a NEON register.
13375 The intrinsics defined in arm_neon.h use the NEON register element
13376 ordering, not the GCC RTL element ordering.
13378 For example, the in-memory ordering of a big-endian a quadword
13379 vector with 16-bit elements when stored from register pair {d0,d1}
13380 will be (lowest address first, d0[N] is NEON register element N):
13382 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13384 When necessary, quadword registers (dN, dN+1) are moved to ARM
13385 registers from rN in the order:
13387 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13389 So that STM/LDM can be used on vectors in ARM registers, and the
13390 same memory layout will result as if VSTM/VLDM were used. */
13393 output_move_neon (rtx *operands)
13395 rtx reg, mem, addr, ops[2];
13396 int regno, load = REG_P (operands[0]);
13399 enum machine_mode mode;
13401 reg = operands[!load];
13402 mem = operands[load];
13404 mode = GET_MODE (reg);
13406 gcc_assert (REG_P (reg));
13407 regno = REGNO (reg);
13408 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13409 || NEON_REGNO_OK_FOR_QUAD (regno));
13410 gcc_assert (VALID_NEON_DREG_MODE (mode)
13411 || VALID_NEON_QREG_MODE (mode)
13412 || VALID_NEON_STRUCT_MODE (mode));
13413 gcc_assert (MEM_P (mem));
13415 addr = XEXP (mem, 0);
13417 /* Strip off const from addresses like (const (plus (...))). */
13418 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13419 addr = XEXP (addr, 0);
13421 switch (GET_CODE (addr))
13424 templ = "v%smia%%?\t%%0!, %%h1";
13425 ops[0] = XEXP (addr, 0);
13430 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13431 templ = "v%smdb%%?\t%%0!, %%h1";
13432 ops[0] = XEXP (addr, 0);
13437 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13438 gcc_unreachable ();
13443 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13446 for (i = 0; i < nregs; i++)
13448 /* We're only using DImode here because it's a convenient size. */
13449 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13450 ops[1] = adjust_address (mem, DImode, 8 * i);
13451 if (reg_overlap_mentioned_p (ops[0], mem))
13453 gcc_assert (overlap == -1);
13458 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13459 output_asm_insn (buff, ops);
13464 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13465 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13466 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13467 output_asm_insn (buff, ops);
13474 templ = "v%smia%%?\t%%m0, %%h1";
13479 sprintf (buff, templ, load ? "ld" : "st");
13480 output_asm_insn (buff, ops);
13485 /* Compute and return the length of neon_mov<mode>, where <mode> is
13486 one of VSTRUCT modes: EI, OI, CI or XI. */
13488 arm_attr_length_move_neon (rtx insn)
13490 rtx reg, mem, addr;
13492 enum machine_mode mode;
13494 extract_insn_cached (insn);
13496 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13498 mode = GET_MODE (recog_data.operand[0]);
13509 gcc_unreachable ();
13513 load = REG_P (recog_data.operand[0]);
13514 reg = recog_data.operand[!load];
13515 mem = recog_data.operand[load];
13517 gcc_assert (MEM_P (mem));
13519 mode = GET_MODE (reg);
13520 addr = XEXP (mem, 0);
13522 /* Strip off const from addresses like (const (plus (...))). */
13523 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13524 addr = XEXP (addr, 0);
13526 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13528 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13535 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13539 arm_address_offset_is_imm (rtx insn)
13543 extract_insn_cached (insn);
13545 if (REG_P (recog_data.operand[0]))
13548 mem = recog_data.operand[0];
13550 gcc_assert (MEM_P (mem));
13552 addr = XEXP (mem, 0);
13554 if (GET_CODE (addr) == REG
13555 || (GET_CODE (addr) == PLUS
13556 && GET_CODE (XEXP (addr, 0)) == REG
13557 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13563 /* Output an ADD r, s, #n where n may be too big for one instruction.
13564 If adding zero to one register, output nothing. */
13566 output_add_immediate (rtx *operands)
13568 HOST_WIDE_INT n = INTVAL (operands[2]);
13570 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13573 output_multi_immediate (operands,
13574 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13577 output_multi_immediate (operands,
13578 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13585 /* Output a multiple immediate operation.
13586 OPERANDS is the vector of operands referred to in the output patterns.
13587 INSTR1 is the output pattern to use for the first constant.
13588 INSTR2 is the output pattern to use for subsequent constants.
13589 IMMED_OP is the index of the constant slot in OPERANDS.
13590 N is the constant value. */
13591 static const char *
13592 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13593 int immed_op, HOST_WIDE_INT n)
13595 #if HOST_BITS_PER_WIDE_INT > 32
13601 /* Quick and easy output. */
13602 operands[immed_op] = const0_rtx;
13603 output_asm_insn (instr1, operands);
13608 const char * instr = instr1;
13610 /* Note that n is never zero here (which would give no output). */
13611 for (i = 0; i < 32; i += 2)
13615 operands[immed_op] = GEN_INT (n & (255 << i));
13616 output_asm_insn (instr, operands);
13626 /* Return the name of a shifter operation. */
13627 static const char *
13628 arm_shift_nmem(enum rtx_code code)
13633 return ARM_LSL_NAME;
13649 /* Return the appropriate ARM instruction for the operation code.
13650 The returned result should not be overwritten. OP is the rtx of the
13651 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13654 arithmetic_instr (rtx op, int shift_first_arg)
13656 switch (GET_CODE (op))
13662 return shift_first_arg ? "rsb" : "sub";
13677 return arm_shift_nmem(GET_CODE(op));
13680 gcc_unreachable ();
13684 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13685 for the operation code. The returned result should not be overwritten.
13686 OP is the rtx code of the shift.
13687 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13689 static const char *
13690 shift_op (rtx op, HOST_WIDE_INT *amountp)
13693 enum rtx_code code = GET_CODE (op);
13695 switch (GET_CODE (XEXP (op, 1)))
13703 *amountp = INTVAL (XEXP (op, 1));
13707 gcc_unreachable ();
13713 gcc_assert (*amountp != -1);
13714 *amountp = 32 - *amountp;
13717 /* Fall through. */
13723 mnem = arm_shift_nmem(code);
13727 /* We never have to worry about the amount being other than a
13728 power of 2, since this case can never be reloaded from a reg. */
13729 gcc_assert (*amountp != -1);
13730 *amountp = int_log2 (*amountp);
13731 return ARM_LSL_NAME;
13734 gcc_unreachable ();
13737 if (*amountp != -1)
13739 /* This is not 100% correct, but follows from the desire to merge
13740 multiplication by a power of 2 with the recognizer for a
13741 shift. >=32 is not a valid shift for "lsl", so we must try and
13742 output a shift that produces the correct arithmetical result.
13743 Using lsr #32 is identical except for the fact that the carry bit
13744 is not set correctly if we set the flags; but we never use the
13745 carry bit from such an operation, so we can ignore that. */
13746 if (code == ROTATERT)
13747 /* Rotate is just modulo 32. */
13749 else if (*amountp != (*amountp & 31))
13751 if (code == ASHIFT)
13756 /* Shifts of 0 are no-ops. */
13764 /* Obtain the shift from the POWER of two. */
13766 static HOST_WIDE_INT
13767 int_log2 (HOST_WIDE_INT power)
13769 HOST_WIDE_INT shift = 0;
13771 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13773 gcc_assert (shift <= 31);
13780 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13781 because /bin/as is horribly restrictive. The judgement about
13782 whether or not each character is 'printable' (and can be output as
13783 is) or not (and must be printed with an octal escape) must be made
13784 with reference to the *host* character set -- the situation is
13785 similar to that discussed in the comments above pp_c_char in
13786 c-pretty-print.c. */
13788 #define MAX_ASCII_LEN 51
13791 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13794 int len_so_far = 0;
13796 fputs ("\t.ascii\t\"", stream);
13798 for (i = 0; i < len; i++)
13802 if (len_so_far >= MAX_ASCII_LEN)
13804 fputs ("\"\n\t.ascii\t\"", stream);
13810 if (c == '\\' || c == '\"')
13812 putc ('\\', stream);
13820 fprintf (stream, "\\%03o", c);
13825 fputs ("\"\n", stream);
13828 /* Compute the register save mask for registers 0 through 12
13829 inclusive. This code is used by arm_compute_save_reg_mask. */
13831 static unsigned long
13832 arm_compute_save_reg0_reg12_mask (void)
13834 unsigned long func_type = arm_current_func_type ();
13835 unsigned long save_reg_mask = 0;
13838 if (IS_INTERRUPT (func_type))
13840 unsigned int max_reg;
13841 /* Interrupt functions must not corrupt any registers,
13842 even call clobbered ones. If this is a leaf function
13843 we can just examine the registers used by the RTL, but
13844 otherwise we have to assume that whatever function is
13845 called might clobber anything, and so we have to save
13846 all the call-clobbered registers as well. */
13847 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13848 /* FIQ handlers have registers r8 - r12 banked, so
13849 we only need to check r0 - r7, Normal ISRs only
13850 bank r14 and r15, so we must check up to r12.
13851 r13 is the stack pointer which is always preserved,
13852 so we do not need to consider it here. */
13857 for (reg = 0; reg <= max_reg; reg++)
13858 if (df_regs_ever_live_p (reg)
13859 || (! current_function_is_leaf && call_used_regs[reg]))
13860 save_reg_mask |= (1 << reg);
13862 /* Also save the pic base register if necessary. */
13864 && !TARGET_SINGLE_PIC_BASE
13865 && arm_pic_register != INVALID_REGNUM
13866 && crtl->uses_pic_offset_table)
13867 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13869 else if (IS_VOLATILE(func_type))
13871 /* For noreturn functions we historically omitted register saves
13872 altogether. However this really messes up debugging. As a
13873 compromise save just the frame pointers. Combined with the link
13874 register saved elsewhere this should be sufficient to get
13876 if (frame_pointer_needed)
13877 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13878 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13879 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13880 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13881 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13885 /* In the normal case we only need to save those registers
13886 which are call saved and which are used by this function. */
13887 for (reg = 0; reg <= 11; reg++)
13888 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13889 save_reg_mask |= (1 << reg);
13891 /* Handle the frame pointer as a special case. */
13892 if (frame_pointer_needed)
13893 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13895 /* If we aren't loading the PIC register,
13896 don't stack it even though it may be live. */
13898 && !TARGET_SINGLE_PIC_BASE
13899 && arm_pic_register != INVALID_REGNUM
13900 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13901 || crtl->uses_pic_offset_table))
13902 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13904 /* The prologue will copy SP into R0, so save it. */
13905 if (IS_STACKALIGN (func_type))
13906 save_reg_mask |= 1;
13909 /* Save registers so the exception handler can modify them. */
13910 if (crtl->calls_eh_return)
13916 reg = EH_RETURN_DATA_REGNO (i);
13917 if (reg == INVALID_REGNUM)
13919 save_reg_mask |= 1 << reg;
13923 return save_reg_mask;
13927 /* Compute the number of bytes used to store the static chain register on the
13928 stack, above the stack frame. We need to know this accurately to get the
13929 alignment of the rest of the stack frame correct. */
13931 static int arm_compute_static_chain_stack_bytes (void)
13933 unsigned long func_type = arm_current_func_type ();
13934 int static_chain_stack_bytes = 0;
13936 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13937 IS_NESTED (func_type) &&
13938 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13939 static_chain_stack_bytes = 4;
13941 return static_chain_stack_bytes;
13945 /* Compute a bit mask of which registers need to be
13946 saved on the stack for the current function.
13947 This is used by arm_get_frame_offsets, which may add extra registers. */
13949 static unsigned long
13950 arm_compute_save_reg_mask (void)
13952 unsigned int save_reg_mask = 0;
13953 unsigned long func_type = arm_current_func_type ();
13956 if (IS_NAKED (func_type))
13957 /* This should never really happen. */
13960 /* If we are creating a stack frame, then we must save the frame pointer,
13961 IP (which will hold the old stack pointer), LR and the PC. */
13962 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13964 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13967 | (1 << PC_REGNUM);
13969 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13971 /* Decide if we need to save the link register.
13972 Interrupt routines have their own banked link register,
13973 so they never need to save it.
13974 Otherwise if we do not use the link register we do not need to save
13975 it. If we are pushing other registers onto the stack however, we
13976 can save an instruction in the epilogue by pushing the link register
13977 now and then popping it back into the PC. This incurs extra memory
13978 accesses though, so we only do it when optimizing for size, and only
13979 if we know that we will not need a fancy return sequence. */
13980 if (df_regs_ever_live_p (LR_REGNUM)
13983 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13984 && !crtl->calls_eh_return))
13985 save_reg_mask |= 1 << LR_REGNUM;
13987 if (cfun->machine->lr_save_eliminated)
13988 save_reg_mask &= ~ (1 << LR_REGNUM);
13990 if (TARGET_REALLY_IWMMXT
13991 && ((bit_count (save_reg_mask)
13992 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13993 arm_compute_static_chain_stack_bytes())
13996 /* The total number of registers that are going to be pushed
13997 onto the stack is odd. We need to ensure that the stack
13998 is 64-bit aligned before we start to save iWMMXt registers,
13999 and also before we start to create locals. (A local variable
14000 might be a double or long long which we will load/store using
14001 an iWMMXt instruction). Therefore we need to push another
14002 ARM register, so that the stack will be 64-bit aligned. We
14003 try to avoid using the arg registers (r0 -r3) as they might be
14004 used to pass values in a tail call. */
14005 for (reg = 4; reg <= 12; reg++)
14006 if ((save_reg_mask & (1 << reg)) == 0)
14010 save_reg_mask |= (1 << reg);
14013 cfun->machine->sibcall_blocked = 1;
14014 save_reg_mask |= (1 << 3);
14018 /* We may need to push an additional register for use initializing the
14019 PIC base register. */
14020 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
14021 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
14023 reg = thumb_find_work_register (1 << 4);
14024 if (!call_used_regs[reg])
14025 save_reg_mask |= (1 << reg);
14028 return save_reg_mask;
14032 /* Compute a bit mask of which registers need to be
14033 saved on the stack for the current function. */
14034 static unsigned long
14035 thumb1_compute_save_reg_mask (void)
14037 unsigned long mask;
14041 for (reg = 0; reg < 12; reg ++)
14042 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14046 && !TARGET_SINGLE_PIC_BASE
14047 && arm_pic_register != INVALID_REGNUM
14048 && crtl->uses_pic_offset_table)
14049 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14051 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14052 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
14053 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14055 /* LR will also be pushed if any lo regs are pushed. */
14056 if (mask & 0xff || thumb_force_lr_save ())
14057 mask |= (1 << LR_REGNUM);
14059 /* Make sure we have a low work register if we need one.
14060 We will need one if we are going to push a high register,
14061 but we are not currently intending to push a low register. */
14062 if ((mask & 0xff) == 0
14063 && ((mask & 0x0f00) || TARGET_BACKTRACE))
14065 /* Use thumb_find_work_register to choose which register
14066 we will use. If the register is live then we will
14067 have to push it. Use LAST_LO_REGNUM as our fallback
14068 choice for the register to select. */
14069 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
14070 /* Make sure the register returned by thumb_find_work_register is
14071 not part of the return value. */
14072 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
14073 reg = LAST_LO_REGNUM;
14075 if (! call_used_regs[reg])
14079 /* The 504 below is 8 bytes less than 512 because there are two possible
14080 alignment words. We can't tell here if they will be present or not so we
14081 have to play it safe and assume that they are. */
14082 if ((CALLER_INTERWORKING_SLOT_SIZE +
14083 ROUND_UP_WORD (get_frame_size ()) +
14084 crtl->outgoing_args_size) >= 504)
14086 /* This is the same as the code in thumb1_expand_prologue() which
14087 determines which register to use for stack decrement. */
14088 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
14089 if (mask & (1 << reg))
14092 if (reg > LAST_LO_REGNUM)
14094 /* Make sure we have a register available for stack decrement. */
14095 mask |= 1 << LAST_LO_REGNUM;
14103 /* Return the number of bytes required to save VFP registers. */
14105 arm_get_vfp_saved_size (void)
14107 unsigned int regno;
14112 /* Space for saved VFP registers. */
14113 if (TARGET_HARD_FLOAT && TARGET_VFP)
14116 for (regno = FIRST_VFP_REGNUM;
14117 regno < LAST_VFP_REGNUM;
14120 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
14121 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
14125 /* Workaround ARM10 VFPr1 bug. */
14126 if (count == 2 && !arm_arch6)
14128 saved += count * 8;
14137 if (count == 2 && !arm_arch6)
14139 saved += count * 8;
14146 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14147 everything bar the final return instruction. */
14149 output_return_instruction (rtx operand, int really_return, int reverse)
14151 char conditional[10];
14154 unsigned long live_regs_mask;
14155 unsigned long func_type;
14156 arm_stack_offsets *offsets;
14158 func_type = arm_current_func_type ();
14160 if (IS_NAKED (func_type))
14163 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14165 /* If this function was declared non-returning, and we have
14166 found a tail call, then we have to trust that the called
14167 function won't return. */
14172 /* Otherwise, trap an attempted return by aborting. */
14174 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14176 assemble_external_libcall (ops[1]);
14177 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14183 gcc_assert (!cfun->calls_alloca || really_return);
14185 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14187 cfun->machine->return_used_this_function = 1;
14189 offsets = arm_get_frame_offsets ();
14190 live_regs_mask = offsets->saved_regs_mask;
14192 if (live_regs_mask)
14194 const char * return_reg;
14196 /* If we do not have any special requirements for function exit
14197 (e.g. interworking) then we can load the return address
14198 directly into the PC. Otherwise we must load it into LR. */
14200 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14201 return_reg = reg_names[PC_REGNUM];
14203 return_reg = reg_names[LR_REGNUM];
14205 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14207 /* There are three possible reasons for the IP register
14208 being saved. 1) a stack frame was created, in which case
14209 IP contains the old stack pointer, or 2) an ISR routine
14210 corrupted it, or 3) it was saved to align the stack on
14211 iWMMXt. In case 1, restore IP into SP, otherwise just
14213 if (frame_pointer_needed)
14215 live_regs_mask &= ~ (1 << IP_REGNUM);
14216 live_regs_mask |= (1 << SP_REGNUM);
14219 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14222 /* On some ARM architectures it is faster to use LDR rather than
14223 LDM to load a single register. On other architectures, the
14224 cost is the same. In 26 bit mode, or for exception handlers,
14225 we have to use LDM to load the PC so that the CPSR is also
14227 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14228 if (live_regs_mask == (1U << reg))
14231 if (reg <= LAST_ARM_REGNUM
14232 && (reg != LR_REGNUM
14234 || ! IS_INTERRUPT (func_type)))
14236 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14237 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14244 /* Generate the load multiple instruction to restore the
14245 registers. Note we can get here, even if
14246 frame_pointer_needed is true, but only if sp already
14247 points to the base of the saved core registers. */
14248 if (live_regs_mask & (1 << SP_REGNUM))
14250 unsigned HOST_WIDE_INT stack_adjust;
14252 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14253 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14255 if (stack_adjust && arm_arch5 && TARGET_ARM)
14256 if (TARGET_UNIFIED_ASM)
14257 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14259 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14262 /* If we can't use ldmib (SA110 bug),
14263 then try to pop r3 instead. */
14265 live_regs_mask |= 1 << 3;
14267 if (TARGET_UNIFIED_ASM)
14268 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14270 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14274 if (TARGET_UNIFIED_ASM)
14275 sprintf (instr, "pop%s\t{", conditional);
14277 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14279 p = instr + strlen (instr);
14281 for (reg = 0; reg <= SP_REGNUM; reg++)
14282 if (live_regs_mask & (1 << reg))
14284 int l = strlen (reg_names[reg]);
14290 memcpy (p, ", ", 2);
14294 memcpy (p, "%|", 2);
14295 memcpy (p + 2, reg_names[reg], l);
14299 if (live_regs_mask & (1 << LR_REGNUM))
14301 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14302 /* If returning from an interrupt, restore the CPSR. */
14303 if (IS_INTERRUPT (func_type))
14310 output_asm_insn (instr, & operand);
14312 /* See if we need to generate an extra instruction to
14313 perform the actual function return. */
14315 && func_type != ARM_FT_INTERWORKED
14316 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14318 /* The return has already been handled
14319 by loading the LR into the PC. */
14326 switch ((int) ARM_FUNC_TYPE (func_type))
14330 /* ??? This is wrong for unified assembly syntax. */
14331 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14334 case ARM_FT_INTERWORKED:
14335 sprintf (instr, "bx%s\t%%|lr", conditional);
14338 case ARM_FT_EXCEPTION:
14339 /* ??? This is wrong for unified assembly syntax. */
14340 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14344 /* Use bx if it's available. */
14345 if (arm_arch5 || arm_arch4t)
14346 sprintf (instr, "bx%s\t%%|lr", conditional);
14348 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14352 output_asm_insn (instr, & operand);
14358 /* Write the function name into the code section, directly preceding
14359 the function prologue.
14361 Code will be output similar to this:
14363 .ascii "arm_poke_function_name", 0
14366 .word 0xff000000 + (t1 - t0)
14367 arm_poke_function_name
14369 stmfd sp!, {fp, ip, lr, pc}
14372 When performing a stack backtrace, code can inspect the value
14373 of 'pc' stored at 'fp' + 0. If the trace function then looks
14374 at location pc - 12 and the top 8 bits are set, then we know
14375 that there is a function name embedded immediately preceding this
14376 location and has length ((pc[-3]) & 0xff000000).
14378 We assume that pc is declared as a pointer to an unsigned long.
14380 It is of no benefit to output the function name if we are assembling
14381 a leaf function. These function types will not contain a stack
14382 backtrace structure, therefore it is not possible to determine the
14385 arm_poke_function_name (FILE *stream, const char *name)
14387 unsigned long alignlength;
14388 unsigned long length;
14391 length = strlen (name) + 1;
14392 alignlength = ROUND_UP_WORD (length);
14394 ASM_OUTPUT_ASCII (stream, name, length);
14395 ASM_OUTPUT_ALIGN (stream, 2);
14396 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14397 assemble_aligned_integer (UNITS_PER_WORD, x);
14400 /* Place some comments into the assembler stream
14401 describing the current function. */
14403 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14405 unsigned long func_type;
14409 thumb1_output_function_prologue (f, frame_size);
14413 /* Sanity check. */
14414 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14416 func_type = arm_current_func_type ();
14418 switch ((int) ARM_FUNC_TYPE (func_type))
14421 case ARM_FT_NORMAL:
14423 case ARM_FT_INTERWORKED:
14424 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14427 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14430 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14432 case ARM_FT_EXCEPTION:
14433 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14437 if (IS_NAKED (func_type))
14438 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14440 if (IS_VOLATILE (func_type))
14441 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14443 if (IS_NESTED (func_type))
14444 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14445 if (IS_STACKALIGN (func_type))
14446 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14448 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14450 crtl->args.pretend_args_size, frame_size);
14452 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14453 frame_pointer_needed,
14454 cfun->machine->uses_anonymous_args);
14456 if (cfun->machine->lr_save_eliminated)
14457 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14459 if (crtl->calls_eh_return)
14460 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14465 arm_output_epilogue (rtx sibling)
14468 unsigned long saved_regs_mask;
14469 unsigned long func_type;
14470 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14471 frame that is $fp + 4 for a non-variadic function. */
14472 int floats_offset = 0;
14474 FILE * f = asm_out_file;
14475 unsigned int lrm_count = 0;
14476 int really_return = (sibling == NULL);
14478 arm_stack_offsets *offsets;
14480 /* If we have already generated the return instruction
14481 then it is futile to generate anything else. */
14482 if (use_return_insn (FALSE, sibling) &&
14483 (cfun->machine->return_used_this_function != 0))
14486 func_type = arm_current_func_type ();
14488 if (IS_NAKED (func_type))
14489 /* Naked functions don't have epilogues. */
14492 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14496 /* A volatile function should never return. Call abort. */
14497 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14498 assemble_external_libcall (op);
14499 output_asm_insn ("bl\t%a0", &op);
14504 /* If we are throwing an exception, then we really must be doing a
14505 return, so we can't tail-call. */
14506 gcc_assert (!crtl->calls_eh_return || really_return);
14508 offsets = arm_get_frame_offsets ();
14509 saved_regs_mask = offsets->saved_regs_mask;
14512 lrm_count = bit_count (saved_regs_mask);
14514 floats_offset = offsets->saved_args;
14515 /* Compute how far away the floats will be. */
14516 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14517 if (saved_regs_mask & (1 << reg))
14518 floats_offset += 4;
14520 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14522 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14523 int vfp_offset = offsets->frame;
14525 if (TARGET_FPA_EMU2)
14527 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14528 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14530 floats_offset += 12;
14531 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14532 reg, FP_REGNUM, floats_offset - vfp_offset);
14537 start_reg = LAST_FPA_REGNUM;
14539 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14541 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14543 floats_offset += 12;
14545 /* We can't unstack more than four registers at once. */
14546 if (start_reg - reg == 3)
14548 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14549 reg, FP_REGNUM, floats_offset - vfp_offset);
14550 start_reg = reg - 1;
14555 if (reg != start_reg)
14556 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14557 reg + 1, start_reg - reg,
14558 FP_REGNUM, floats_offset - vfp_offset);
14559 start_reg = reg - 1;
14563 /* Just in case the last register checked also needs unstacking. */
14564 if (reg != start_reg)
14565 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14566 reg + 1, start_reg - reg,
14567 FP_REGNUM, floats_offset - vfp_offset);
14570 if (TARGET_HARD_FLOAT && TARGET_VFP)
14574 /* The fldmd insns do not have base+offset addressing
14575 modes, so we use IP to hold the address. */
14576 saved_size = arm_get_vfp_saved_size ();
14578 if (saved_size > 0)
14580 floats_offset += saved_size;
14581 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14582 FP_REGNUM, floats_offset - vfp_offset);
14584 start_reg = FIRST_VFP_REGNUM;
14585 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14587 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14588 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14590 if (start_reg != reg)
14591 vfp_output_fldmd (f, IP_REGNUM,
14592 (start_reg - FIRST_VFP_REGNUM) / 2,
14593 (reg - start_reg) / 2);
14594 start_reg = reg + 2;
14597 if (start_reg != reg)
14598 vfp_output_fldmd (f, IP_REGNUM,
14599 (start_reg - FIRST_VFP_REGNUM) / 2,
14600 (reg - start_reg) / 2);
14605 /* The frame pointer is guaranteed to be non-double-word aligned.
14606 This is because it is set to (old_stack_pointer - 4) and the
14607 old_stack_pointer was double word aligned. Thus the offset to
14608 the iWMMXt registers to be loaded must also be non-double-word
14609 sized, so that the resultant address *is* double-word aligned.
14610 We can ignore floats_offset since that was already included in
14611 the live_regs_mask. */
14612 lrm_count += (lrm_count % 2 ? 2 : 1);
14614 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14615 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14617 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14618 reg, FP_REGNUM, lrm_count * 4);
14623 /* saved_regs_mask should contain the IP, which at the time of stack
14624 frame generation actually contains the old stack pointer. So a
14625 quick way to unwind the stack is just pop the IP register directly
14626 into the stack pointer. */
14627 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14628 saved_regs_mask &= ~ (1 << IP_REGNUM);
14629 saved_regs_mask |= (1 << SP_REGNUM);
14631 /* There are two registers left in saved_regs_mask - LR and PC. We
14632 only need to restore the LR register (the return address), but to
14633 save time we can load it directly into the PC, unless we need a
14634 special function exit sequence, or we are not really returning. */
14636 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14637 && !crtl->calls_eh_return)
14638 /* Delete the LR from the register mask, so that the LR on
14639 the stack is loaded into the PC in the register mask. */
14640 saved_regs_mask &= ~ (1 << LR_REGNUM);
14642 saved_regs_mask &= ~ (1 << PC_REGNUM);
14644 /* We must use SP as the base register, because SP is one of the
14645 registers being restored. If an interrupt or page fault
14646 happens in the ldm instruction, the SP might or might not
14647 have been restored. That would be bad, as then SP will no
14648 longer indicate the safe area of stack, and we can get stack
14649 corruption. Using SP as the base register means that it will
14650 be reset correctly to the original value, should an interrupt
14651 occur. If the stack pointer already points at the right
14652 place, then omit the subtraction. */
14653 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14654 || cfun->calls_alloca)
14655 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14656 4 * bit_count (saved_regs_mask));
14657 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14659 if (IS_INTERRUPT (func_type))
14660 /* Interrupt handlers will have pushed the
14661 IP onto the stack, so restore it now. */
14662 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14666 /* This branch is executed for ARM mode (non-apcs frames) and
14667 Thumb-2 mode. Frame layout is essentially the same for those
14668 cases, except that in ARM mode frame pointer points to the
14669 first saved register, while in Thumb-2 mode the frame pointer points
14670 to the last saved register.
14672 It is possible to make frame pointer point to last saved
14673 register in both cases, and remove some conditionals below.
14674 That means that fp setup in prologue would be just "mov fp, sp"
14675 and sp restore in epilogue would be just "mov sp, fp", whereas
14676 now we have to use add/sub in those cases. However, the value
14677 of that would be marginal, as both mov and add/sub are 32-bit
14678 in ARM mode, and it would require extra conditionals
14679 in arm_expand_prologue to distingish ARM-apcs-frame case
14680 (where frame pointer is required to point at first register)
14681 and ARM-non-apcs-frame. Therefore, such change is postponed
14682 until real need arise. */
14683 unsigned HOST_WIDE_INT amount;
14685 /* Restore stack pointer if necessary. */
14686 if (TARGET_ARM && frame_pointer_needed)
14688 operands[0] = stack_pointer_rtx;
14689 operands[1] = hard_frame_pointer_rtx;
14691 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14692 output_add_immediate (operands);
14696 if (frame_pointer_needed)
14698 /* For Thumb-2 restore sp from the frame pointer.
14699 Operand restrictions mean we have to incrememnt FP, then copy
14701 amount = offsets->locals_base - offsets->saved_regs;
14702 operands[0] = hard_frame_pointer_rtx;
14706 unsigned long count;
14707 operands[0] = stack_pointer_rtx;
14708 amount = offsets->outgoing_args - offsets->saved_regs;
14709 /* pop call clobbered registers if it avoids a
14710 separate stack adjustment. */
14711 count = offsets->saved_regs - offsets->saved_args;
14714 && !crtl->calls_eh_return
14715 && bit_count(saved_regs_mask) * 4 == count
14716 && !IS_INTERRUPT (func_type)
14717 && !crtl->tail_call_emit)
14719 unsigned long mask;
14720 /* Preserve return values, of any size. */
14721 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14723 mask &= ~saved_regs_mask;
14725 while (bit_count (mask) * 4 > amount)
14727 while ((mask & (1 << reg)) == 0)
14729 mask &= ~(1 << reg);
14731 if (bit_count (mask) * 4 == amount) {
14733 saved_regs_mask |= mask;
14740 operands[1] = operands[0];
14741 operands[2] = GEN_INT (amount);
14742 output_add_immediate (operands);
14744 if (frame_pointer_needed)
14745 asm_fprintf (f, "\tmov\t%r, %r\n",
14746 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14749 if (TARGET_FPA_EMU2)
14751 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14752 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14753 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14758 start_reg = FIRST_FPA_REGNUM;
14760 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14762 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14764 if (reg - start_reg == 3)
14766 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14767 start_reg, SP_REGNUM);
14768 start_reg = reg + 1;
14773 if (reg != start_reg)
14774 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14775 start_reg, reg - start_reg,
14778 start_reg = reg + 1;
14782 /* Just in case the last register checked also needs unstacking. */
14783 if (reg != start_reg)
14784 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14785 start_reg, reg - start_reg, SP_REGNUM);
14788 if (TARGET_HARD_FLOAT && TARGET_VFP)
14790 int end_reg = LAST_VFP_REGNUM + 1;
14792 /* Scan the registers in reverse order. We need to match
14793 any groupings made in the prologue and generate matching
14795 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14797 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14798 && (!df_regs_ever_live_p (reg + 1)
14799 || call_used_regs[reg + 1]))
14801 if (end_reg > reg + 2)
14802 vfp_output_fldmd (f, SP_REGNUM,
14803 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14804 (end_reg - (reg + 2)) / 2);
14808 if (end_reg > reg + 2)
14809 vfp_output_fldmd (f, SP_REGNUM, 0,
14810 (end_reg - (reg + 2)) / 2);
14814 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14815 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14816 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14818 /* If we can, restore the LR into the PC. */
14819 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14820 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14821 && !IS_STACKALIGN (func_type)
14823 && crtl->args.pretend_args_size == 0
14824 && saved_regs_mask & (1 << LR_REGNUM)
14825 && !crtl->calls_eh_return)
14827 saved_regs_mask &= ~ (1 << LR_REGNUM);
14828 saved_regs_mask |= (1 << PC_REGNUM);
14829 rfe = IS_INTERRUPT (func_type);
14834 /* Load the registers off the stack. If we only have one register
14835 to load use the LDR instruction - it is faster. For Thumb-2
14836 always use pop and the assembler will pick the best instruction.*/
14837 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14838 && !IS_INTERRUPT(func_type))
14840 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14842 else if (saved_regs_mask)
14844 if (saved_regs_mask & (1 << SP_REGNUM))
14845 /* Note - write back to the stack register is not enabled
14846 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14847 in the list of registers and if we add writeback the
14848 instruction becomes UNPREDICTABLE. */
14849 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14851 else if (TARGET_ARM)
14852 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14855 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14858 if (crtl->args.pretend_args_size)
14860 /* Unwind the pre-pushed regs. */
14861 operands[0] = operands[1] = stack_pointer_rtx;
14862 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14863 output_add_immediate (operands);
14867 /* We may have already restored PC directly from the stack. */
14868 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14871 /* Stack adjustment for exception handler. */
14872 if (crtl->calls_eh_return)
14873 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14874 ARM_EH_STACKADJ_REGNUM);
14876 /* Generate the return instruction. */
14877 switch ((int) ARM_FUNC_TYPE (func_type))
14881 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14884 case ARM_FT_EXCEPTION:
14885 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14888 case ARM_FT_INTERWORKED:
14889 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14893 if (IS_STACKALIGN (func_type))
14895 /* See comment in arm_expand_prologue. */
14896 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14898 if (arm_arch5 || arm_arch4t)
14899 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14901 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14909 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14910 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14912 arm_stack_offsets *offsets;
14918 /* Emit any call-via-reg trampolines that are needed for v4t support
14919 of call_reg and call_value_reg type insns. */
14920 for (regno = 0; regno < LR_REGNUM; regno++)
14922 rtx label = cfun->machine->call_via[regno];
14926 switch_to_section (function_section (current_function_decl));
14927 targetm.asm_out.internal_label (asm_out_file, "L",
14928 CODE_LABEL_NUMBER (label));
14929 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14933 /* ??? Probably not safe to set this here, since it assumes that a
14934 function will be emitted as assembly immediately after we generate
14935 RTL for it. This does not happen for inline functions. */
14936 cfun->machine->return_used_this_function = 0;
14938 else /* TARGET_32BIT */
14940 /* We need to take into account any stack-frame rounding. */
14941 offsets = arm_get_frame_offsets ();
14943 gcc_assert (!use_return_insn (FALSE, NULL)
14944 || (cfun->machine->return_used_this_function != 0)
14945 || offsets->saved_regs == offsets->outgoing_args
14946 || frame_pointer_needed);
14948 /* Reset the ARM-specific per-function variables. */
14949 after_arm_reorg = 0;
14953 /* Generate and emit an insn that we will recognize as a push_multi.
14954 Unfortunately, since this insn does not reflect very well the actual
14955 semantics of the operation, we need to annotate the insn for the benefit
14956 of DWARF2 frame unwind information. */
14958 emit_multi_reg_push (unsigned long mask)
14961 int num_dwarf_regs;
14965 int dwarf_par_index;
14968 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14969 if (mask & (1 << i))
14972 gcc_assert (num_regs && num_regs <= 16);
14974 /* We don't record the PC in the dwarf frame information. */
14975 num_dwarf_regs = num_regs;
14976 if (mask & (1 << PC_REGNUM))
14979 /* For the body of the insn we are going to generate an UNSPEC in
14980 parallel with several USEs. This allows the insn to be recognized
14981 by the push_multi pattern in the arm.md file.
14983 The body of the insn looks something like this:
14986 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14987 (const_int:SI <num>)))
14988 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14994 For the frame note however, we try to be more explicit and actually
14995 show each register being stored into the stack frame, plus a (single)
14996 decrement of the stack pointer. We do it this way in order to be
14997 friendly to the stack unwinding code, which only wants to see a single
14998 stack decrement per instruction. The RTL we generate for the note looks
14999 something like this:
15002 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15003 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15004 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15005 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15009 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15010 instead we'd have a parallel expression detailing all
15011 the stores to the various memory addresses so that debug
15012 information is more up-to-date. Remember however while writing
15013 this to take care of the constraints with the push instruction.
15015 Note also that this has to be taken care of for the VFP registers.
15017 For more see PR43399. */
15019 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15020 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15021 dwarf_par_index = 1;
15023 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15025 if (mask & (1 << i))
15027 reg = gen_rtx_REG (SImode, i);
15029 XVECEXP (par, 0, 0)
15030 = gen_rtx_SET (VOIDmode,
15033 gen_rtx_PRE_MODIFY (Pmode,
15036 (stack_pointer_rtx,
15039 gen_rtx_UNSPEC (BLKmode,
15040 gen_rtvec (1, reg),
15041 UNSPEC_PUSH_MULT));
15043 if (i != PC_REGNUM)
15045 tmp = gen_rtx_SET (VOIDmode,
15046 gen_frame_mem (SImode, stack_pointer_rtx),
15048 RTX_FRAME_RELATED_P (tmp) = 1;
15049 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15057 for (j = 1, i++; j < num_regs; i++)
15059 if (mask & (1 << i))
15061 reg = gen_rtx_REG (SImode, i);
15063 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15065 if (i != PC_REGNUM)
15068 = gen_rtx_SET (VOIDmode,
15071 plus_constant (stack_pointer_rtx,
15074 RTX_FRAME_RELATED_P (tmp) = 1;
15075 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15082 par = emit_insn (par);
15084 tmp = gen_rtx_SET (VOIDmode,
15086 plus_constant (stack_pointer_rtx, -4 * num_regs));
15087 RTX_FRAME_RELATED_P (tmp) = 1;
15088 XVECEXP (dwarf, 0, 0) = tmp;
15090 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15095 /* Calculate the size of the return value that is passed in registers. */
15097 arm_size_return_regs (void)
15099 enum machine_mode mode;
15101 if (crtl->return_rtx != 0)
15102 mode = GET_MODE (crtl->return_rtx);
15104 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15106 return GET_MODE_SIZE (mode);
15110 emit_sfm (int base_reg, int count)
15117 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
15118 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
15120 reg = gen_rtx_REG (XFmode, base_reg++);
15122 XVECEXP (par, 0, 0)
15123 = gen_rtx_SET (VOIDmode,
15126 gen_rtx_PRE_MODIFY (Pmode,
15129 (stack_pointer_rtx,
15132 gen_rtx_UNSPEC (BLKmode,
15133 gen_rtvec (1, reg),
15134 UNSPEC_PUSH_MULT));
15135 tmp = gen_rtx_SET (VOIDmode,
15136 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
15137 RTX_FRAME_RELATED_P (tmp) = 1;
15138 XVECEXP (dwarf, 0, 1) = tmp;
15140 for (i = 1; i < count; i++)
15142 reg = gen_rtx_REG (XFmode, base_reg++);
15143 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
15145 tmp = gen_rtx_SET (VOIDmode,
15146 gen_frame_mem (XFmode,
15147 plus_constant (stack_pointer_rtx,
15150 RTX_FRAME_RELATED_P (tmp) = 1;
15151 XVECEXP (dwarf, 0, i + 1) = tmp;
15154 tmp = gen_rtx_SET (VOIDmode,
15156 plus_constant (stack_pointer_rtx, -12 * count));
15158 RTX_FRAME_RELATED_P (tmp) = 1;
15159 XVECEXP (dwarf, 0, 0) = tmp;
15161 par = emit_insn (par);
15162 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15168 /* Return true if the current function needs to save/restore LR. */
15171 thumb_force_lr_save (void)
15173 return !cfun->machine->lr_save_eliminated
15174 && (!leaf_function_p ()
15175 || thumb_far_jump_used_p ()
15176 || df_regs_ever_live_p (LR_REGNUM));
15180 /* Return true if r3 is used by any of the tail call insns in the
15181 current function. */
15184 any_sibcall_uses_r3 (void)
15189 if (!crtl->tail_call_emit)
15191 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15192 if (e->flags & EDGE_SIBCALL)
15194 rtx call = BB_END (e->src);
15195 if (!CALL_P (call))
15196 call = prev_nonnote_nondebug_insn (call);
15197 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
15198 if (find_regno_fusage (call, USE, 3))
15205 /* Compute the distance from register FROM to register TO.
15206 These can be the arg pointer (26), the soft frame pointer (25),
15207 the stack pointer (13) or the hard frame pointer (11).
15208 In thumb mode r7 is used as the soft frame pointer, if needed.
15209 Typical stack layout looks like this:
15211 old stack pointer -> | |
15214 | | saved arguments for
15215 | | vararg functions
15218 hard FP & arg pointer -> | | \
15226 soft frame pointer -> | | /
15231 locals base pointer -> | | /
15236 current stack pointer -> | | /
15239 For a given function some or all of these stack components
15240 may not be needed, giving rise to the possibility of
15241 eliminating some of the registers.
15243 The values returned by this function must reflect the behavior
15244 of arm_expand_prologue() and arm_compute_save_reg_mask().
15246 The sign of the number returned reflects the direction of stack
15247 growth, so the values are positive for all eliminations except
15248 from the soft frame pointer to the hard frame pointer.
15250 SFP may point just inside the local variables block to ensure correct
15254 /* Calculate stack offsets. These are used to calculate register elimination
15255 offsets and in prologue/epilogue code. Also calculates which registers
15256 should be saved. */
15258 static arm_stack_offsets *
15259 arm_get_frame_offsets (void)
15261 struct arm_stack_offsets *offsets;
15262 unsigned long func_type;
15266 HOST_WIDE_INT frame_size;
15269 offsets = &cfun->machine->stack_offsets;
15271 /* We need to know if we are a leaf function. Unfortunately, it
15272 is possible to be called after start_sequence has been called,
15273 which causes get_insns to return the insns for the sequence,
15274 not the function, which will cause leaf_function_p to return
15275 the incorrect result.
15277 to know about leaf functions once reload has completed, and the
15278 frame size cannot be changed after that time, so we can safely
15279 use the cached value. */
15281 if (reload_completed)
15284 /* Initially this is the size of the local variables. It will translated
15285 into an offset once we have determined the size of preceding data. */
15286 frame_size = ROUND_UP_WORD (get_frame_size ());
15288 leaf = leaf_function_p ();
15290 /* Space for variadic functions. */
15291 offsets->saved_args = crtl->args.pretend_args_size;
15293 /* In Thumb mode this is incorrect, but never used. */
15294 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15295 arm_compute_static_chain_stack_bytes();
15299 unsigned int regno;
15301 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15302 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15303 saved = core_saved;
15305 /* We know that SP will be doubleword aligned on entry, and we must
15306 preserve that condition at any subroutine call. We also require the
15307 soft frame pointer to be doubleword aligned. */
15309 if (TARGET_REALLY_IWMMXT)
15311 /* Check for the call-saved iWMMXt registers. */
15312 for (regno = FIRST_IWMMXT_REGNUM;
15313 regno <= LAST_IWMMXT_REGNUM;
15315 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15319 func_type = arm_current_func_type ();
15320 if (! IS_VOLATILE (func_type))
15322 /* Space for saved FPA registers. */
15323 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15324 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15327 /* Space for saved VFP registers. */
15328 if (TARGET_HARD_FLOAT && TARGET_VFP)
15329 saved += arm_get_vfp_saved_size ();
15332 else /* TARGET_THUMB1 */
15334 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15335 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15336 saved = core_saved;
15337 if (TARGET_BACKTRACE)
15341 /* Saved registers include the stack frame. */
15342 offsets->saved_regs = offsets->saved_args + saved +
15343 arm_compute_static_chain_stack_bytes();
15344 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15345 /* A leaf function does not need any stack alignment if it has nothing
15347 if (leaf && frame_size == 0
15348 /* However if it calls alloca(), we have a dynamically allocated
15349 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
15350 && ! cfun->calls_alloca)
15352 offsets->outgoing_args = offsets->soft_frame;
15353 offsets->locals_base = offsets->soft_frame;
15357 /* Ensure SFP has the correct alignment. */
15358 if (ARM_DOUBLEWORD_ALIGN
15359 && (offsets->soft_frame & 7))
15361 offsets->soft_frame += 4;
15362 /* Try to align stack by pushing an extra reg. Don't bother doing this
15363 when there is a stack frame as the alignment will be rolled into
15364 the normal stack adjustment. */
15365 if (frame_size + crtl->outgoing_args_size == 0)
15369 /* If it is safe to use r3, then do so. This sometimes
15370 generates better code on Thumb-2 by avoiding the need to
15371 use 32-bit push/pop instructions. */
15372 if (! any_sibcall_uses_r3 ()
15373 && arm_size_return_regs () <= 12
15374 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15379 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15381 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15390 offsets->saved_regs += 4;
15391 offsets->saved_regs_mask |= (1 << reg);
15396 offsets->locals_base = offsets->soft_frame + frame_size;
15397 offsets->outgoing_args = (offsets->locals_base
15398 + crtl->outgoing_args_size);
15400 if (ARM_DOUBLEWORD_ALIGN)
15402 /* Ensure SP remains doubleword aligned. */
15403 if (offsets->outgoing_args & 7)
15404 offsets->outgoing_args += 4;
15405 gcc_assert (!(offsets->outgoing_args & 7));
15412 /* Calculate the relative offsets for the different stack pointers. Positive
15413 offsets are in the direction of stack growth. */
15416 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15418 arm_stack_offsets *offsets;
15420 offsets = arm_get_frame_offsets ();
15422 /* OK, now we have enough information to compute the distances.
15423 There must be an entry in these switch tables for each pair
15424 of registers in ELIMINABLE_REGS, even if some of the entries
15425 seem to be redundant or useless. */
15428 case ARG_POINTER_REGNUM:
15431 case THUMB_HARD_FRAME_POINTER_REGNUM:
15434 case FRAME_POINTER_REGNUM:
15435 /* This is the reverse of the soft frame pointer
15436 to hard frame pointer elimination below. */
15437 return offsets->soft_frame - offsets->saved_args;
15439 case ARM_HARD_FRAME_POINTER_REGNUM:
15440 /* This is only non-zero in the case where the static chain register
15441 is stored above the frame. */
15442 return offsets->frame - offsets->saved_args - 4;
15444 case STACK_POINTER_REGNUM:
15445 /* If nothing has been pushed on the stack at all
15446 then this will return -4. This *is* correct! */
15447 return offsets->outgoing_args - (offsets->saved_args + 4);
15450 gcc_unreachable ();
15452 gcc_unreachable ();
15454 case FRAME_POINTER_REGNUM:
15457 case THUMB_HARD_FRAME_POINTER_REGNUM:
15460 case ARM_HARD_FRAME_POINTER_REGNUM:
15461 /* The hard frame pointer points to the top entry in the
15462 stack frame. The soft frame pointer to the bottom entry
15463 in the stack frame. If there is no stack frame at all,
15464 then they are identical. */
15466 return offsets->frame - offsets->soft_frame;
15468 case STACK_POINTER_REGNUM:
15469 return offsets->outgoing_args - offsets->soft_frame;
15472 gcc_unreachable ();
15474 gcc_unreachable ();
15477 /* You cannot eliminate from the stack pointer.
15478 In theory you could eliminate from the hard frame
15479 pointer to the stack pointer, but this will never
15480 happen, since if a stack frame is not needed the
15481 hard frame pointer will never be used. */
15482 gcc_unreachable ();
15486 /* Given FROM and TO register numbers, say whether this elimination is
15487 allowed. Frame pointer elimination is automatically handled.
15489 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15490 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15491 pointer, we must eliminate FRAME_POINTER_REGNUM into
15492 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15493 ARG_POINTER_REGNUM. */
15496 arm_can_eliminate (const int from, const int to)
15498 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15499 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15500 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15501 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15505 /* Emit RTL to save coprocessor registers on function entry. Returns the
15506 number of bytes pushed. */
15509 arm_save_coproc_regs(void)
15511 int saved_size = 0;
15513 unsigned start_reg;
15516 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15517 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15519 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15520 insn = gen_rtx_MEM (V2SImode, insn);
15521 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15522 RTX_FRAME_RELATED_P (insn) = 1;
15526 /* Save any floating point call-saved registers used by this
15528 if (TARGET_FPA_EMU2)
15530 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15531 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15533 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15534 insn = gen_rtx_MEM (XFmode, insn);
15535 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15536 RTX_FRAME_RELATED_P (insn) = 1;
15542 start_reg = LAST_FPA_REGNUM;
15544 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15546 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15548 if (start_reg - reg == 3)
15550 insn = emit_sfm (reg, 4);
15551 RTX_FRAME_RELATED_P (insn) = 1;
15553 start_reg = reg - 1;
15558 if (start_reg != reg)
15560 insn = emit_sfm (reg + 1, start_reg - reg);
15561 RTX_FRAME_RELATED_P (insn) = 1;
15562 saved_size += (start_reg - reg) * 12;
15564 start_reg = reg - 1;
15568 if (start_reg != reg)
15570 insn = emit_sfm (reg + 1, start_reg - reg);
15571 saved_size += (start_reg - reg) * 12;
15572 RTX_FRAME_RELATED_P (insn) = 1;
15575 if (TARGET_HARD_FLOAT && TARGET_VFP)
15577 start_reg = FIRST_VFP_REGNUM;
15579 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15581 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15582 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15584 if (start_reg != reg)
15585 saved_size += vfp_emit_fstmd (start_reg,
15586 (reg - start_reg) / 2);
15587 start_reg = reg + 2;
15590 if (start_reg != reg)
15591 saved_size += vfp_emit_fstmd (start_reg,
15592 (reg - start_reg) / 2);
15598 /* Set the Thumb frame pointer from the stack pointer. */
15601 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15603 HOST_WIDE_INT amount;
15606 amount = offsets->outgoing_args - offsets->locals_base;
15608 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15609 stack_pointer_rtx, GEN_INT (amount)));
15612 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15613 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15614 expects the first two operands to be the same. */
15617 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15619 hard_frame_pointer_rtx));
15623 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15624 hard_frame_pointer_rtx,
15625 stack_pointer_rtx));
15627 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15628 plus_constant (stack_pointer_rtx, amount));
15629 RTX_FRAME_RELATED_P (dwarf) = 1;
15630 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15633 RTX_FRAME_RELATED_P (insn) = 1;
15636 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15639 arm_expand_prologue (void)
15644 unsigned long live_regs_mask;
15645 unsigned long func_type;
15647 int saved_pretend_args = 0;
15648 int saved_regs = 0;
15649 unsigned HOST_WIDE_INT args_to_push;
15650 arm_stack_offsets *offsets;
15652 func_type = arm_current_func_type ();
15654 /* Naked functions don't have prologues. */
15655 if (IS_NAKED (func_type))
15658 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15659 args_to_push = crtl->args.pretend_args_size;
15661 /* Compute which register we will have to save onto the stack. */
15662 offsets = arm_get_frame_offsets ();
15663 live_regs_mask = offsets->saved_regs_mask;
15665 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15667 if (IS_STACKALIGN (func_type))
15672 /* Handle a word-aligned stack pointer. We generate the following:
15677 <save and restore r0 in normal prologue/epilogue>
15681 The unwinder doesn't need to know about the stack realignment.
15682 Just tell it we saved SP in r0. */
15683 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15685 r0 = gen_rtx_REG (SImode, 0);
15686 r1 = gen_rtx_REG (SImode, 1);
15687 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15688 compiler won't choke. */
15689 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15690 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15691 insn = gen_movsi (r0, stack_pointer_rtx);
15692 RTX_FRAME_RELATED_P (insn) = 1;
15693 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15695 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15696 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15699 /* For APCS frames, if IP register is clobbered
15700 when creating frame, save that register in a special
15702 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15704 if (IS_INTERRUPT (func_type))
15706 /* Interrupt functions must not corrupt any registers.
15707 Creating a frame pointer however, corrupts the IP
15708 register, so we must push it first. */
15709 insn = emit_multi_reg_push (1 << IP_REGNUM);
15711 /* Do not set RTX_FRAME_RELATED_P on this insn.
15712 The dwarf stack unwinding code only wants to see one
15713 stack decrement per function, and this is not it. If
15714 this instruction is labeled as being part of the frame
15715 creation sequence then dwarf2out_frame_debug_expr will
15716 die when it encounters the assignment of IP to FP
15717 later on, since the use of SP here establishes SP as
15718 the CFA register and not IP.
15720 Anyway this instruction is not really part of the stack
15721 frame creation although it is part of the prologue. */
15723 else if (IS_NESTED (func_type))
15725 /* The Static chain register is the same as the IP register
15726 used as a scratch register during stack frame creation.
15727 To get around this need to find somewhere to store IP
15728 whilst the frame is being created. We try the following
15731 1. The last argument register.
15732 2. A slot on the stack above the frame. (This only
15733 works if the function is not a varargs function).
15734 3. Register r3, after pushing the argument registers
15737 Note - we only need to tell the dwarf2 backend about the SP
15738 adjustment in the second variant; the static chain register
15739 doesn't need to be unwound, as it doesn't contain a value
15740 inherited from the caller. */
15742 if (df_regs_ever_live_p (3) == false)
15743 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15744 else if (args_to_push == 0)
15748 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15751 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15752 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15755 /* Just tell the dwarf backend that we adjusted SP. */
15756 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15757 plus_constant (stack_pointer_rtx,
15759 RTX_FRAME_RELATED_P (insn) = 1;
15760 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15764 /* Store the args on the stack. */
15765 if (cfun->machine->uses_anonymous_args)
15766 insn = emit_multi_reg_push
15767 ((0xf0 >> (args_to_push / 4)) & 0xf);
15770 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15771 GEN_INT (- args_to_push)));
15773 RTX_FRAME_RELATED_P (insn) = 1;
15775 saved_pretend_args = 1;
15776 fp_offset = args_to_push;
15779 /* Now reuse r3 to preserve IP. */
15780 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15784 insn = emit_set_insn (ip_rtx,
15785 plus_constant (stack_pointer_rtx, fp_offset));
15786 RTX_FRAME_RELATED_P (insn) = 1;
15791 /* Push the argument registers, or reserve space for them. */
15792 if (cfun->machine->uses_anonymous_args)
15793 insn = emit_multi_reg_push
15794 ((0xf0 >> (args_to_push / 4)) & 0xf);
15797 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15798 GEN_INT (- args_to_push)));
15799 RTX_FRAME_RELATED_P (insn) = 1;
15802 /* If this is an interrupt service routine, and the link register
15803 is going to be pushed, and we're not generating extra
15804 push of IP (needed when frame is needed and frame layout if apcs),
15805 subtracting four from LR now will mean that the function return
15806 can be done with a single instruction. */
15807 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15808 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15809 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15812 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15814 emit_set_insn (lr, plus_constant (lr, -4));
15817 if (live_regs_mask)
15819 saved_regs += bit_count (live_regs_mask) * 4;
15820 if (optimize_size && !frame_pointer_needed
15821 && saved_regs == offsets->saved_regs - offsets->saved_args)
15823 /* If no coprocessor registers are being pushed and we don't have
15824 to worry about a frame pointer then push extra registers to
15825 create the stack frame. This is done is a way that does not
15826 alter the frame layout, so is independent of the epilogue. */
15830 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15832 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15833 if (frame && n * 4 >= frame)
15836 live_regs_mask |= (1 << n) - 1;
15837 saved_regs += frame;
15840 insn = emit_multi_reg_push (live_regs_mask);
15841 RTX_FRAME_RELATED_P (insn) = 1;
15844 if (! IS_VOLATILE (func_type))
15845 saved_regs += arm_save_coproc_regs ();
15847 if (frame_pointer_needed && TARGET_ARM)
15849 /* Create the new frame pointer. */
15850 if (TARGET_APCS_FRAME)
15852 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15853 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15854 RTX_FRAME_RELATED_P (insn) = 1;
15856 if (IS_NESTED (func_type))
15858 /* Recover the static chain register. */
15859 if (!df_regs_ever_live_p (3)
15860 || saved_pretend_args)
15861 insn = gen_rtx_REG (SImode, 3);
15862 else /* if (crtl->args.pretend_args_size == 0) */
15864 insn = plus_constant (hard_frame_pointer_rtx, 4);
15865 insn = gen_frame_mem (SImode, insn);
15867 emit_set_insn (ip_rtx, insn);
15868 /* Add a USE to stop propagate_one_insn() from barfing. */
15869 emit_insn (gen_prologue_use (ip_rtx));
15874 insn = GEN_INT (saved_regs - 4);
15875 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15876 stack_pointer_rtx, insn));
15877 RTX_FRAME_RELATED_P (insn) = 1;
15881 if (flag_stack_usage_info)
15882 current_function_static_stack_size
15883 = offsets->outgoing_args - offsets->saved_args;
15885 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15887 /* This add can produce multiple insns for a large constant, so we
15888 need to get tricky. */
15889 rtx last = get_last_insn ();
15891 amount = GEN_INT (offsets->saved_args + saved_regs
15892 - offsets->outgoing_args);
15894 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15898 last = last ? NEXT_INSN (last) : get_insns ();
15899 RTX_FRAME_RELATED_P (last) = 1;
15901 while (last != insn);
15903 /* If the frame pointer is needed, emit a special barrier that
15904 will prevent the scheduler from moving stores to the frame
15905 before the stack adjustment. */
15906 if (frame_pointer_needed)
15907 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15908 hard_frame_pointer_rtx));
15912 if (frame_pointer_needed && TARGET_THUMB2)
15913 thumb_set_frame_pointer (offsets);
15915 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15917 unsigned long mask;
15919 mask = live_regs_mask;
15920 mask &= THUMB2_WORK_REGS;
15921 if (!IS_NESTED (func_type))
15922 mask |= (1 << IP_REGNUM);
15923 arm_load_pic_register (mask);
15926 /* If we are profiling, make sure no instructions are scheduled before
15927 the call to mcount. Similarly if the user has requested no
15928 scheduling in the prolog. Similarly if we want non-call exceptions
15929 using the EABI unwinder, to prevent faulting instructions from being
15930 swapped with a stack adjustment. */
15931 if (crtl->profile || !TARGET_SCHED_PROLOG
15932 || (arm_except_unwind_info (&global_options) == UI_TARGET
15933 && cfun->can_throw_non_call_exceptions))
15934 emit_insn (gen_blockage ());
15936 /* If the link register is being kept alive, with the return address in it,
15937 then make sure that it does not get reused by the ce2 pass. */
15938 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15939 cfun->machine->lr_save_eliminated = 1;
15942 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15944 arm_print_condition (FILE *stream)
15946 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15948 /* Branch conversion is not implemented for Thumb-2. */
15951 output_operand_lossage ("predicated Thumb instruction");
15954 if (current_insn_predicate != NULL)
15956 output_operand_lossage
15957 ("predicated instruction in conditional sequence");
15961 fputs (arm_condition_codes[arm_current_cc], stream);
15963 else if (current_insn_predicate)
15965 enum arm_cond_code code;
15969 output_operand_lossage ("predicated Thumb instruction");
15973 code = get_arm_condition_code (current_insn_predicate);
15974 fputs (arm_condition_codes[code], stream);
15979 /* If CODE is 'd', then the X is a condition operand and the instruction
15980 should only be executed if the condition is true.
15981 if CODE is 'D', then the X is a condition operand and the instruction
15982 should only be executed if the condition is false: however, if the mode
15983 of the comparison is CCFPEmode, then always execute the instruction -- we
15984 do this because in these circumstances !GE does not necessarily imply LT;
15985 in these cases the instruction pattern will take care to make sure that
15986 an instruction containing %d will follow, thereby undoing the effects of
15987 doing this instruction unconditionally.
15988 If CODE is 'N' then X is a floating point operand that must be negated
15990 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15991 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15993 arm_print_operand (FILE *stream, rtx x, int code)
15998 fputs (ASM_COMMENT_START, stream);
16002 fputs (user_label_prefix, stream);
16006 fputs (REGISTER_PREFIX, stream);
16010 arm_print_condition (stream);
16014 /* Nothing in unified syntax, otherwise the current condition code. */
16015 if (!TARGET_UNIFIED_ASM)
16016 arm_print_condition (stream);
16020 /* The current condition code in unified syntax, otherwise nothing. */
16021 if (TARGET_UNIFIED_ASM)
16022 arm_print_condition (stream);
16026 /* The current condition code for a condition code setting instruction.
16027 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16028 if (TARGET_UNIFIED_ASM)
16030 fputc('s', stream);
16031 arm_print_condition (stream);
16035 arm_print_condition (stream);
16036 fputc('s', stream);
16041 /* If the instruction is conditionally executed then print
16042 the current condition code, otherwise print 's'. */
16043 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16044 if (current_insn_predicate)
16045 arm_print_condition (stream);
16047 fputc('s', stream);
16050 /* %# is a "break" sequence. It doesn't output anything, but is used to
16051 separate e.g. operand numbers from following text, if that text consists
16052 of further digits which we don't want to be part of the operand
16060 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16061 r = real_value_negate (&r);
16062 fprintf (stream, "%s", fp_const_from_val (&r));
16066 /* An integer or symbol address without a preceding # sign. */
16068 switch (GET_CODE (x))
16071 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16075 output_addr_const (stream, x);
16079 gcc_unreachable ();
16084 if (GET_CODE (x) == CONST_INT)
16087 val = ARM_SIGN_EXTEND (~INTVAL (x));
16088 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16092 putc ('~', stream);
16093 output_addr_const (stream, x);
16098 /* The low 16 bits of an immediate constant. */
16099 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16103 fprintf (stream, "%s", arithmetic_instr (x, 1));
16106 /* Truncate Cirrus shift counts. */
16108 if (GET_CODE (x) == CONST_INT)
16110 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
16113 arm_print_operand (stream, x, 0);
16117 fprintf (stream, "%s", arithmetic_instr (x, 0));
16125 if (!shift_operator (x, SImode))
16127 output_operand_lossage ("invalid shift operand");
16131 shift = shift_op (x, &val);
16135 fprintf (stream, ", %s ", shift);
16137 arm_print_operand (stream, XEXP (x, 1), 0);
16139 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16144 /* An explanation of the 'Q', 'R' and 'H' register operands:
16146 In a pair of registers containing a DI or DF value the 'Q'
16147 operand returns the register number of the register containing
16148 the least significant part of the value. The 'R' operand returns
16149 the register number of the register containing the most
16150 significant part of the value.
16152 The 'H' operand returns the higher of the two register numbers.
16153 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16154 same as the 'Q' operand, since the most significant part of the
16155 value is held in the lower number register. The reverse is true
16156 on systems where WORDS_BIG_ENDIAN is false.
16158 The purpose of these operands is to distinguish between cases
16159 where the endian-ness of the values is important (for example
16160 when they are added together), and cases where the endian-ness
16161 is irrelevant, but the order of register operations is important.
16162 For example when loading a value from memory into a register
16163 pair, the endian-ness does not matter. Provided that the value
16164 from the lower memory address is put into the lower numbered
16165 register, and the value from the higher address is put into the
16166 higher numbered register, the load will work regardless of whether
16167 the value being loaded is big-wordian or little-wordian. The
16168 order of the two register loads can matter however, if the address
16169 of the memory location is actually held in one of the registers
16170 being overwritten by the load.
16172 The 'Q' and 'R' constraints are also available for 64-bit
16175 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16177 rtx part = gen_lowpart (SImode, x);
16178 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16182 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16184 output_operand_lossage ("invalid operand for code '%c'", code);
16188 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16192 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16194 enum machine_mode mode = GET_MODE (x);
16197 if (mode == VOIDmode)
16199 part = gen_highpart_mode (SImode, mode, x);
16200 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16204 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16206 output_operand_lossage ("invalid operand for code '%c'", code);
16210 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16214 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16216 output_operand_lossage ("invalid operand for code '%c'", code);
16220 asm_fprintf (stream, "%r", REGNO (x) + 1);
16224 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16226 output_operand_lossage ("invalid operand for code '%c'", code);
16230 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16234 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16236 output_operand_lossage ("invalid operand for code '%c'", code);
16240 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16244 asm_fprintf (stream, "%r",
16245 GET_CODE (XEXP (x, 0)) == REG
16246 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16250 asm_fprintf (stream, "{%r-%r}",
16252 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16255 /* Like 'M', but writing doubleword vector registers, for use by Neon
16259 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16260 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16262 asm_fprintf (stream, "{d%d}", regno);
16264 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16269 /* CONST_TRUE_RTX means always -- that's the default. */
16270 if (x == const_true_rtx)
16273 if (!COMPARISON_P (x))
16275 output_operand_lossage ("invalid operand for code '%c'", code);
16279 fputs (arm_condition_codes[get_arm_condition_code (x)],
16284 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16285 want to do that. */
16286 if (x == const_true_rtx)
16288 output_operand_lossage ("instruction never executed");
16291 if (!COMPARISON_P (x))
16293 output_operand_lossage ("invalid operand for code '%c'", code);
16297 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16298 (get_arm_condition_code (x))],
16302 /* Cirrus registers can be accessed in a variety of ways:
16303 single floating point (f)
16304 double floating point (d)
16306 64bit integer (dx). */
16307 case 'W': /* Cirrus register in F mode. */
16308 case 'X': /* Cirrus register in D mode. */
16309 case 'Y': /* Cirrus register in FX mode. */
16310 case 'Z': /* Cirrus register in DX mode. */
16311 gcc_assert (GET_CODE (x) == REG
16312 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16314 fprintf (stream, "mv%s%s",
16316 : code == 'X' ? "d"
16317 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16321 /* Print cirrus register in the mode specified by the register's mode. */
16324 int mode = GET_MODE (x);
16326 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16328 output_operand_lossage ("invalid operand for code '%c'", code);
16332 fprintf (stream, "mv%s%s",
16333 mode == DFmode ? "d"
16334 : mode == SImode ? "fx"
16335 : mode == DImode ? "dx"
16336 : "f", reg_names[REGNO (x)] + 2);
16342 if (GET_CODE (x) != REG
16343 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16344 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16345 /* Bad value for wCG register number. */
16347 output_operand_lossage ("invalid operand for code '%c'", code);
16352 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16355 /* Print an iWMMXt control register name. */
16357 if (GET_CODE (x) != CONST_INT
16359 || INTVAL (x) >= 16)
16360 /* Bad value for wC register number. */
16362 output_operand_lossage ("invalid operand for code '%c'", code);
16368 static const char * wc_reg_names [16] =
16370 "wCID", "wCon", "wCSSF", "wCASF",
16371 "wC4", "wC5", "wC6", "wC7",
16372 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16373 "wC12", "wC13", "wC14", "wC15"
16376 fprintf (stream, wc_reg_names [INTVAL (x)]);
16380 /* Print the high single-precision register of a VFP double-precision
16384 int mode = GET_MODE (x);
16387 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16389 output_operand_lossage ("invalid operand for code '%c'", code);
16394 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16396 output_operand_lossage ("invalid operand for code '%c'", code);
16400 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16404 /* Print a VFP/Neon double precision or quad precision register name. */
16408 int mode = GET_MODE (x);
16409 int is_quad = (code == 'q');
16412 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16414 output_operand_lossage ("invalid operand for code '%c'", code);
16418 if (GET_CODE (x) != REG
16419 || !IS_VFP_REGNUM (REGNO (x)))
16421 output_operand_lossage ("invalid operand for code '%c'", code);
16426 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16427 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16429 output_operand_lossage ("invalid operand for code '%c'", code);
16433 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16434 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16438 /* These two codes print the low/high doubleword register of a Neon quad
16439 register, respectively. For pair-structure types, can also print
16440 low/high quadword registers. */
16444 int mode = GET_MODE (x);
16447 if ((GET_MODE_SIZE (mode) != 16
16448 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16450 output_operand_lossage ("invalid operand for code '%c'", code);
16455 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16457 output_operand_lossage ("invalid operand for code '%c'", code);
16461 if (GET_MODE_SIZE (mode) == 16)
16462 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16463 + (code == 'f' ? 1 : 0));
16465 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16466 + (code == 'f' ? 1 : 0));
16470 /* Print a VFPv3 floating-point constant, represented as an integer
16474 int index = vfp3_const_double_index (x);
16475 gcc_assert (index != -1);
16476 fprintf (stream, "%d", index);
16480 /* Print bits representing opcode features for Neon.
16482 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16483 and polynomials as unsigned.
16485 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16487 Bit 2 is 1 for rounding functions, 0 otherwise. */
16489 /* Identify the type as 's', 'u', 'p' or 'f'. */
16492 HOST_WIDE_INT bits = INTVAL (x);
16493 fputc ("uspf"[bits & 3], stream);
16497 /* Likewise, but signed and unsigned integers are both 'i'. */
16500 HOST_WIDE_INT bits = INTVAL (x);
16501 fputc ("iipf"[bits & 3], stream);
16505 /* As for 'T', but emit 'u' instead of 'p'. */
16508 HOST_WIDE_INT bits = INTVAL (x);
16509 fputc ("usuf"[bits & 3], stream);
16513 /* Bit 2: rounding (vs none). */
16516 HOST_WIDE_INT bits = INTVAL (x);
16517 fputs ((bits & 4) != 0 ? "r" : "", stream);
16521 /* Memory operand for vld1/vst1 instruction. */
16525 bool postinc = FALSE;
16526 unsigned align, memsize, align_bits;
16528 gcc_assert (GET_CODE (x) == MEM);
16529 addr = XEXP (x, 0);
16530 if (GET_CODE (addr) == POST_INC)
16533 addr = XEXP (addr, 0);
16535 asm_fprintf (stream, "[%r", REGNO (addr));
16537 /* We know the alignment of this access, so we can emit a hint in the
16538 instruction (for some alignments) as an aid to the memory subsystem
16540 align = MEM_ALIGN (x) >> 3;
16541 memsize = INTVAL (MEM_SIZE (x));
16543 /* Only certain alignment specifiers are supported by the hardware. */
16544 if (memsize == 16 && (align % 32) == 0)
16546 else if ((memsize == 8 || memsize == 16) && (align % 16) == 0)
16548 else if ((align % 8) == 0)
16553 if (align_bits != 0)
16554 asm_fprintf (stream, ":%d", align_bits);
16556 asm_fprintf (stream, "]");
16559 fputs("!", stream);
16567 gcc_assert (GET_CODE (x) == MEM);
16568 addr = XEXP (x, 0);
16569 gcc_assert (GET_CODE (addr) == REG);
16570 asm_fprintf (stream, "[%r]", REGNO (addr));
16574 /* Translate an S register number into a D register number and element index. */
16577 int mode = GET_MODE (x);
16580 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16582 output_operand_lossage ("invalid operand for code '%c'", code);
16587 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16589 output_operand_lossage ("invalid operand for code '%c'", code);
16593 regno = regno - FIRST_VFP_REGNUM;
16594 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16598 /* Register specifier for vld1.16/vst1.16. Translate the S register
16599 number into a D register number and element index. */
16602 int mode = GET_MODE (x);
16605 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16607 output_operand_lossage ("invalid operand for code '%c'", code);
16612 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16614 output_operand_lossage ("invalid operand for code '%c'", code);
16618 regno = regno - FIRST_VFP_REGNUM;
16619 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16626 output_operand_lossage ("missing operand");
16630 switch (GET_CODE (x))
16633 asm_fprintf (stream, "%r", REGNO (x));
16637 output_memory_reference_mode = GET_MODE (x);
16638 output_address (XEXP (x, 0));
16645 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16646 sizeof (fpstr), 0, 1);
16647 fprintf (stream, "#%s", fpstr);
16650 fprintf (stream, "#%s", fp_immediate_constant (x));
16654 gcc_assert (GET_CODE (x) != NEG);
16655 fputc ('#', stream);
16656 if (GET_CODE (x) == HIGH)
16658 fputs (":lower16:", stream);
16662 output_addr_const (stream, x);
16668 /* Target hook for printing a memory address. */
16670 arm_print_operand_address (FILE *stream, rtx x)
16674 int is_minus = GET_CODE (x) == MINUS;
16676 if (GET_CODE (x) == REG)
16677 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16678 else if (GET_CODE (x) == PLUS || is_minus)
16680 rtx base = XEXP (x, 0);
16681 rtx index = XEXP (x, 1);
16682 HOST_WIDE_INT offset = 0;
16683 if (GET_CODE (base) != REG
16684 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16686 /* Ensure that BASE is a register. */
16687 /* (one of them must be). */
16688 /* Also ensure the SP is not used as in index register. */
16693 switch (GET_CODE (index))
16696 offset = INTVAL (index);
16699 asm_fprintf (stream, "[%r, #%wd]",
16700 REGNO (base), offset);
16704 asm_fprintf (stream, "[%r, %s%r]",
16705 REGNO (base), is_minus ? "-" : "",
16715 asm_fprintf (stream, "[%r, %s%r",
16716 REGNO (base), is_minus ? "-" : "",
16717 REGNO (XEXP (index, 0)));
16718 arm_print_operand (stream, index, 'S');
16719 fputs ("]", stream);
16724 gcc_unreachable ();
16727 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16728 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16730 extern enum machine_mode output_memory_reference_mode;
16732 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16734 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16735 asm_fprintf (stream, "[%r, #%s%d]!",
16736 REGNO (XEXP (x, 0)),
16737 GET_CODE (x) == PRE_DEC ? "-" : "",
16738 GET_MODE_SIZE (output_memory_reference_mode));
16740 asm_fprintf (stream, "[%r], #%s%d",
16741 REGNO (XEXP (x, 0)),
16742 GET_CODE (x) == POST_DEC ? "-" : "",
16743 GET_MODE_SIZE (output_memory_reference_mode));
16745 else if (GET_CODE (x) == PRE_MODIFY)
16747 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16748 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16749 asm_fprintf (stream, "#%wd]!",
16750 INTVAL (XEXP (XEXP (x, 1), 1)));
16752 asm_fprintf (stream, "%r]!",
16753 REGNO (XEXP (XEXP (x, 1), 1)));
16755 else if (GET_CODE (x) == POST_MODIFY)
16757 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16758 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16759 asm_fprintf (stream, "#%wd",
16760 INTVAL (XEXP (XEXP (x, 1), 1)));
16762 asm_fprintf (stream, "%r",
16763 REGNO (XEXP (XEXP (x, 1), 1)));
16765 else output_addr_const (stream, x);
16769 if (GET_CODE (x) == REG)
16770 asm_fprintf (stream, "[%r]", REGNO (x));
16771 else if (GET_CODE (x) == POST_INC)
16772 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16773 else if (GET_CODE (x) == PLUS)
16775 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16776 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16777 asm_fprintf (stream, "[%r, #%wd]",
16778 REGNO (XEXP (x, 0)),
16779 INTVAL (XEXP (x, 1)));
16781 asm_fprintf (stream, "[%r, %r]",
16782 REGNO (XEXP (x, 0)),
16783 REGNO (XEXP (x, 1)));
16786 output_addr_const (stream, x);
16790 /* Target hook for indicating whether a punctuation character for
16791 TARGET_PRINT_OPERAND is valid. */
16793 arm_print_operand_punct_valid_p (unsigned char code)
16795 return (code == '@' || code == '|' || code == '.'
16796 || code == '(' || code == ')' || code == '#'
16797 || (TARGET_32BIT && (code == '?'))
16798 || (TARGET_THUMB2 && (code == '!'))
16799 || (TARGET_THUMB && (code == '_')));
16802 /* Target hook for assembling integer objects. The ARM version needs to
16803 handle word-sized values specially. */
16805 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16807 enum machine_mode mode;
16809 if (size == UNITS_PER_WORD && aligned_p)
16811 fputs ("\t.word\t", asm_out_file);
16812 output_addr_const (asm_out_file, x);
16814 /* Mark symbols as position independent. We only do this in the
16815 .text segment, not in the .data segment. */
16816 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16817 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16819 /* See legitimize_pic_address for an explanation of the
16820 TARGET_VXWORKS_RTP check. */
16821 if (TARGET_VXWORKS_RTP
16822 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16823 fputs ("(GOT)", asm_out_file);
16825 fputs ("(GOTOFF)", asm_out_file);
16827 fputc ('\n', asm_out_file);
16831 mode = GET_MODE (x);
16833 if (arm_vector_mode_supported_p (mode))
16837 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16839 units = CONST_VECTOR_NUNITS (x);
16840 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16842 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16843 for (i = 0; i < units; i++)
16845 rtx elt = CONST_VECTOR_ELT (x, i);
16847 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16850 for (i = 0; i < units; i++)
16852 rtx elt = CONST_VECTOR_ELT (x, i);
16853 REAL_VALUE_TYPE rval;
16855 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16858 (rval, GET_MODE_INNER (mode),
16859 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16865 return default_assemble_integer (x, size, aligned_p);
16869 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16873 if (!TARGET_AAPCS_BASED)
16876 default_named_section_asm_out_constructor
16877 : default_named_section_asm_out_destructor) (symbol, priority);
16881 /* Put these in the .init_array section, using a special relocation. */
16882 if (priority != DEFAULT_INIT_PRIORITY)
16885 sprintf (buf, "%s.%.5u",
16886 is_ctor ? ".init_array" : ".fini_array",
16888 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16895 switch_to_section (s);
16896 assemble_align (POINTER_SIZE);
16897 fputs ("\t.word\t", asm_out_file);
16898 output_addr_const (asm_out_file, symbol);
16899 fputs ("(target1)\n", asm_out_file);
16902 /* Add a function to the list of static constructors. */
16905 arm_elf_asm_constructor (rtx symbol, int priority)
16907 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16910 /* Add a function to the list of static destructors. */
16913 arm_elf_asm_destructor (rtx symbol, int priority)
16915 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16918 /* A finite state machine takes care of noticing whether or not instructions
16919 can be conditionally executed, and thus decrease execution time and code
16920 size by deleting branch instructions. The fsm is controlled by
16921 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16923 /* The state of the fsm controlling condition codes are:
16924 0: normal, do nothing special
16925 1: make ASM_OUTPUT_OPCODE not output this instruction
16926 2: make ASM_OUTPUT_OPCODE not output this instruction
16927 3: make instructions conditional
16928 4: make instructions conditional
16930 State transitions (state->state by whom under condition):
16931 0 -> 1 final_prescan_insn if the `target' is a label
16932 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16933 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16934 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16935 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16936 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16937 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16938 (the target insn is arm_target_insn).
16940 If the jump clobbers the conditions then we use states 2 and 4.
16942 A similar thing can be done with conditional return insns.
16944 XXX In case the `target' is an unconditional branch, this conditionalising
16945 of the instructions always reduces code size, but not always execution
16946 time. But then, I want to reduce the code size to somewhere near what
16947 /bin/cc produces. */
16949 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16950 instructions. When a COND_EXEC instruction is seen the subsequent
16951 instructions are scanned so that multiple conditional instructions can be
16952 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16953 specify the length and true/false mask for the IT block. These will be
16954 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16956 /* Returns the index of the ARM condition code string in
16957 `arm_condition_codes'. COMPARISON should be an rtx like
16958 `(eq (...) (...))'. */
16959 static enum arm_cond_code
16960 get_arm_condition_code (rtx comparison)
16962 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16963 enum arm_cond_code code;
16964 enum rtx_code comp_code = GET_CODE (comparison);
16966 if (GET_MODE_CLASS (mode) != MODE_CC)
16967 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16968 XEXP (comparison, 1));
16972 case CC_DNEmode: code = ARM_NE; goto dominance;
16973 case CC_DEQmode: code = ARM_EQ; goto dominance;
16974 case CC_DGEmode: code = ARM_GE; goto dominance;
16975 case CC_DGTmode: code = ARM_GT; goto dominance;
16976 case CC_DLEmode: code = ARM_LE; goto dominance;
16977 case CC_DLTmode: code = ARM_LT; goto dominance;
16978 case CC_DGEUmode: code = ARM_CS; goto dominance;
16979 case CC_DGTUmode: code = ARM_HI; goto dominance;
16980 case CC_DLEUmode: code = ARM_LS; goto dominance;
16981 case CC_DLTUmode: code = ARM_CC;
16984 gcc_assert (comp_code == EQ || comp_code == NE);
16986 if (comp_code == EQ)
16987 return ARM_INVERSE_CONDITION_CODE (code);
16993 case NE: return ARM_NE;
16994 case EQ: return ARM_EQ;
16995 case GE: return ARM_PL;
16996 case LT: return ARM_MI;
16997 default: gcc_unreachable ();
17003 case NE: return ARM_NE;
17004 case EQ: return ARM_EQ;
17005 default: gcc_unreachable ();
17011 case NE: return ARM_MI;
17012 case EQ: return ARM_PL;
17013 default: gcc_unreachable ();
17018 /* These encodings assume that AC=1 in the FPA system control
17019 byte. This allows us to handle all cases except UNEQ and
17023 case GE: return ARM_GE;
17024 case GT: return ARM_GT;
17025 case LE: return ARM_LS;
17026 case LT: return ARM_MI;
17027 case NE: return ARM_NE;
17028 case EQ: return ARM_EQ;
17029 case ORDERED: return ARM_VC;
17030 case UNORDERED: return ARM_VS;
17031 case UNLT: return ARM_LT;
17032 case UNLE: return ARM_LE;
17033 case UNGT: return ARM_HI;
17034 case UNGE: return ARM_PL;
17035 /* UNEQ and LTGT do not have a representation. */
17036 case UNEQ: /* Fall through. */
17037 case LTGT: /* Fall through. */
17038 default: gcc_unreachable ();
17044 case NE: return ARM_NE;
17045 case EQ: return ARM_EQ;
17046 case GE: return ARM_LE;
17047 case GT: return ARM_LT;
17048 case LE: return ARM_GE;
17049 case LT: return ARM_GT;
17050 case GEU: return ARM_LS;
17051 case GTU: return ARM_CC;
17052 case LEU: return ARM_CS;
17053 case LTU: return ARM_HI;
17054 default: gcc_unreachable ();
17060 case LTU: return ARM_CS;
17061 case GEU: return ARM_CC;
17062 default: gcc_unreachable ();
17068 case NE: return ARM_NE;
17069 case EQ: return ARM_EQ;
17070 case GEU: return ARM_CS;
17071 case GTU: return ARM_HI;
17072 case LEU: return ARM_LS;
17073 case LTU: return ARM_CC;
17074 default: gcc_unreachable ();
17080 case GE: return ARM_GE;
17081 case LT: return ARM_LT;
17082 case GEU: return ARM_CS;
17083 case LTU: return ARM_CC;
17084 default: gcc_unreachable ();
17090 case NE: return ARM_NE;
17091 case EQ: return ARM_EQ;
17092 case GE: return ARM_GE;
17093 case GT: return ARM_GT;
17094 case LE: return ARM_LE;
17095 case LT: return ARM_LT;
17096 case GEU: return ARM_CS;
17097 case GTU: return ARM_HI;
17098 case LEU: return ARM_LS;
17099 case LTU: return ARM_CC;
17100 default: gcc_unreachable ();
17103 default: gcc_unreachable ();
17107 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17110 thumb2_final_prescan_insn (rtx insn)
17112 rtx first_insn = insn;
17113 rtx body = PATTERN (insn);
17115 enum arm_cond_code code;
17119 /* Remove the previous insn from the count of insns to be output. */
17120 if (arm_condexec_count)
17121 arm_condexec_count--;
17123 /* Nothing to do if we are already inside a conditional block. */
17124 if (arm_condexec_count)
17127 if (GET_CODE (body) != COND_EXEC)
17130 /* Conditional jumps are implemented directly. */
17131 if (GET_CODE (insn) == JUMP_INSN)
17134 predicate = COND_EXEC_TEST (body);
17135 arm_current_cc = get_arm_condition_code (predicate);
17137 n = get_attr_ce_count (insn);
17138 arm_condexec_count = 1;
17139 arm_condexec_mask = (1 << n) - 1;
17140 arm_condexec_masklen = n;
17141 /* See if subsequent instructions can be combined into the same block. */
17144 insn = next_nonnote_insn (insn);
17146 /* Jumping into the middle of an IT block is illegal, so a label or
17147 barrier terminates the block. */
17148 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17151 body = PATTERN (insn);
17152 /* USE and CLOBBER aren't really insns, so just skip them. */
17153 if (GET_CODE (body) == USE
17154 || GET_CODE (body) == CLOBBER)
17157 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17158 if (GET_CODE (body) != COND_EXEC)
17160 /* Allow up to 4 conditionally executed instructions in a block. */
17161 n = get_attr_ce_count (insn);
17162 if (arm_condexec_masklen + n > 4)
17165 predicate = COND_EXEC_TEST (body);
17166 code = get_arm_condition_code (predicate);
17167 mask = (1 << n) - 1;
17168 if (arm_current_cc == code)
17169 arm_condexec_mask |= (mask << arm_condexec_masklen);
17170 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17173 arm_condexec_count++;
17174 arm_condexec_masklen += n;
17176 /* A jump must be the last instruction in a conditional block. */
17177 if (GET_CODE(insn) == JUMP_INSN)
17180 /* Restore recog_data (getting the attributes of other insns can
17181 destroy this array, but final.c assumes that it remains intact
17182 across this call). */
17183 extract_constrain_insn_cached (first_insn);
17187 arm_final_prescan_insn (rtx insn)
17189 /* BODY will hold the body of INSN. */
17190 rtx body = PATTERN (insn);
17192 /* This will be 1 if trying to repeat the trick, and things need to be
17193 reversed if it appears to fail. */
17196 /* If we start with a return insn, we only succeed if we find another one. */
17197 int seeking_return = 0;
17199 /* START_INSN will hold the insn from where we start looking. This is the
17200 first insn after the following code_label if REVERSE is true. */
17201 rtx start_insn = insn;
17203 /* If in state 4, check if the target branch is reached, in order to
17204 change back to state 0. */
17205 if (arm_ccfsm_state == 4)
17207 if (insn == arm_target_insn)
17209 arm_target_insn = NULL;
17210 arm_ccfsm_state = 0;
17215 /* If in state 3, it is possible to repeat the trick, if this insn is an
17216 unconditional branch to a label, and immediately following this branch
17217 is the previous target label which is only used once, and the label this
17218 branch jumps to is not too far off. */
17219 if (arm_ccfsm_state == 3)
17221 if (simplejump_p (insn))
17223 start_insn = next_nonnote_insn (start_insn);
17224 if (GET_CODE (start_insn) == BARRIER)
17226 /* XXX Isn't this always a barrier? */
17227 start_insn = next_nonnote_insn (start_insn);
17229 if (GET_CODE (start_insn) == CODE_LABEL
17230 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17231 && LABEL_NUSES (start_insn) == 1)
17236 else if (GET_CODE (body) == RETURN)
17238 start_insn = next_nonnote_insn (start_insn);
17239 if (GET_CODE (start_insn) == BARRIER)
17240 start_insn = next_nonnote_insn (start_insn);
17241 if (GET_CODE (start_insn) == CODE_LABEL
17242 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17243 && LABEL_NUSES (start_insn) == 1)
17246 seeking_return = 1;
17255 gcc_assert (!arm_ccfsm_state || reverse);
17256 if (GET_CODE (insn) != JUMP_INSN)
17259 /* This jump might be paralleled with a clobber of the condition codes
17260 the jump should always come first */
17261 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17262 body = XVECEXP (body, 0, 0);
17265 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17266 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17269 int fail = FALSE, succeed = FALSE;
17270 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17271 int then_not_else = TRUE;
17272 rtx this_insn = start_insn, label = 0;
17274 /* Register the insn jumped to. */
17277 if (!seeking_return)
17278 label = XEXP (SET_SRC (body), 0);
17280 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17281 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17282 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17284 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17285 then_not_else = FALSE;
17287 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17288 seeking_return = 1;
17289 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17291 seeking_return = 1;
17292 then_not_else = FALSE;
17295 gcc_unreachable ();
17297 /* See how many insns this branch skips, and what kind of insns. If all
17298 insns are okay, and the label or unconditional branch to the same
17299 label is not too far away, succeed. */
17300 for (insns_skipped = 0;
17301 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17305 this_insn = next_nonnote_insn (this_insn);
17309 switch (GET_CODE (this_insn))
17312 /* Succeed if it is the target label, otherwise fail since
17313 control falls in from somewhere else. */
17314 if (this_insn == label)
17316 arm_ccfsm_state = 1;
17324 /* Succeed if the following insn is the target label.
17326 If return insns are used then the last insn in a function
17327 will be a barrier. */
17328 this_insn = next_nonnote_insn (this_insn);
17329 if (this_insn && this_insn == label)
17331 arm_ccfsm_state = 1;
17339 /* The AAPCS says that conditional calls should not be
17340 used since they make interworking inefficient (the
17341 linker can't transform BL<cond> into BLX). That's
17342 only a problem if the machine has BLX. */
17349 /* Succeed if the following insn is the target label, or
17350 if the following two insns are a barrier and the
17352 this_insn = next_nonnote_insn (this_insn);
17353 if (this_insn && GET_CODE (this_insn) == BARRIER)
17354 this_insn = next_nonnote_insn (this_insn);
17356 if (this_insn && this_insn == label
17357 && insns_skipped < max_insns_skipped)
17359 arm_ccfsm_state = 1;
17367 /* If this is an unconditional branch to the same label, succeed.
17368 If it is to another label, do nothing. If it is conditional,
17370 /* XXX Probably, the tests for SET and the PC are
17373 scanbody = PATTERN (this_insn);
17374 if (GET_CODE (scanbody) == SET
17375 && GET_CODE (SET_DEST (scanbody)) == PC)
17377 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17378 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17380 arm_ccfsm_state = 2;
17383 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17386 /* Fail if a conditional return is undesirable (e.g. on a
17387 StrongARM), but still allow this if optimizing for size. */
17388 else if (GET_CODE (scanbody) == RETURN
17389 && !use_return_insn (TRUE, NULL)
17392 else if (GET_CODE (scanbody) == RETURN
17395 arm_ccfsm_state = 2;
17398 else if (GET_CODE (scanbody) == PARALLEL)
17400 switch (get_attr_conds (this_insn))
17410 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17415 /* Instructions using or affecting the condition codes make it
17417 scanbody = PATTERN (this_insn);
17418 if (!(GET_CODE (scanbody) == SET
17419 || GET_CODE (scanbody) == PARALLEL)
17420 || get_attr_conds (this_insn) != CONDS_NOCOND)
17423 /* A conditional cirrus instruction must be followed by
17424 a non Cirrus instruction. However, since we
17425 conditionalize instructions in this function and by
17426 the time we get here we can't add instructions
17427 (nops), because shorten_branches() has already been
17428 called, we will disable conditionalizing Cirrus
17429 instructions to be safe. */
17430 if (GET_CODE (scanbody) != USE
17431 && GET_CODE (scanbody) != CLOBBER
17432 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17442 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17443 arm_target_label = CODE_LABEL_NUMBER (label);
17446 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17448 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17450 this_insn = next_nonnote_insn (this_insn);
17451 gcc_assert (!this_insn
17452 || (GET_CODE (this_insn) != BARRIER
17453 && GET_CODE (this_insn) != CODE_LABEL));
17457 /* Oh, dear! we ran off the end.. give up. */
17458 extract_constrain_insn_cached (insn);
17459 arm_ccfsm_state = 0;
17460 arm_target_insn = NULL;
17463 arm_target_insn = this_insn;
17466 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17469 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17471 if (reverse || then_not_else)
17472 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17475 /* Restore recog_data (getting the attributes of other insns can
17476 destroy this array, but final.c assumes that it remains intact
17477 across this call. */
17478 extract_constrain_insn_cached (insn);
17482 /* Output IT instructions. */
17484 thumb2_asm_output_opcode (FILE * stream)
17489 if (arm_condexec_mask)
17491 for (n = 0; n < arm_condexec_masklen; n++)
17492 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17494 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17495 arm_condition_codes[arm_current_cc]);
17496 arm_condexec_mask = 0;
17500 /* Returns true if REGNO is a valid register
17501 for holding a quantity of type MODE. */
17503 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17505 if (GET_MODE_CLASS (mode) == MODE_CC)
17506 return (regno == CC_REGNUM
17507 || (TARGET_HARD_FLOAT && TARGET_VFP
17508 && regno == VFPCC_REGNUM));
17511 /* For the Thumb we only allow values bigger than SImode in
17512 registers 0 - 6, so that there is always a second low
17513 register available to hold the upper part of the value.
17514 We probably we ought to ensure that the register is the
17515 start of an even numbered register pair. */
17516 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17518 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17519 && IS_CIRRUS_REGNUM (regno))
17520 /* We have outlawed SI values in Cirrus registers because they
17521 reside in the lower 32 bits, but SF values reside in the
17522 upper 32 bits. This causes gcc all sorts of grief. We can't
17523 even split the registers into pairs because Cirrus SI values
17524 get sign extended to 64bits-- aldyh. */
17525 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17527 if (TARGET_HARD_FLOAT && TARGET_VFP
17528 && IS_VFP_REGNUM (regno))
17530 if (mode == SFmode || mode == SImode)
17531 return VFP_REGNO_OK_FOR_SINGLE (regno);
17533 if (mode == DFmode)
17534 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17536 /* VFP registers can hold HFmode values, but there is no point in
17537 putting them there unless we have hardware conversion insns. */
17538 if (mode == HFmode)
17539 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17542 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17543 || (VALID_NEON_QREG_MODE (mode)
17544 && NEON_REGNO_OK_FOR_QUAD (regno))
17545 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17546 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17547 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17548 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17549 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17554 if (TARGET_REALLY_IWMMXT)
17556 if (IS_IWMMXT_GR_REGNUM (regno))
17557 return mode == SImode;
17559 if (IS_IWMMXT_REGNUM (regno))
17560 return VALID_IWMMXT_REG_MODE (mode);
17563 /* We allow almost any value to be stored in the general registers.
17564 Restrict doubleword quantities to even register pairs so that we can
17565 use ldrd. Do not allow very large Neon structure opaque modes in
17566 general registers; they would use too many. */
17567 if (regno <= LAST_ARM_REGNUM)
17568 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17569 && ARM_NUM_REGS (mode) <= 4;
17571 if (regno == FRAME_POINTER_REGNUM
17572 || regno == ARG_POINTER_REGNUM)
17573 /* We only allow integers in the fake hard registers. */
17574 return GET_MODE_CLASS (mode) == MODE_INT;
17576 /* The only registers left are the FPA registers
17577 which we only allow to hold FP values. */
17578 return (TARGET_HARD_FLOAT && TARGET_FPA
17579 && GET_MODE_CLASS (mode) == MODE_FLOAT
17580 && regno >= FIRST_FPA_REGNUM
17581 && regno <= LAST_FPA_REGNUM);
17584 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17585 not used in arm mode. */
17588 arm_regno_class (int regno)
17592 if (regno == STACK_POINTER_REGNUM)
17594 if (regno == CC_REGNUM)
17601 if (TARGET_THUMB2 && regno < 8)
17604 if ( regno <= LAST_ARM_REGNUM
17605 || regno == FRAME_POINTER_REGNUM
17606 || regno == ARG_POINTER_REGNUM)
17607 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17609 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17610 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17612 if (IS_CIRRUS_REGNUM (regno))
17613 return CIRRUS_REGS;
17615 if (IS_VFP_REGNUM (regno))
17617 if (regno <= D7_VFP_REGNUM)
17618 return VFP_D0_D7_REGS;
17619 else if (regno <= LAST_LO_VFP_REGNUM)
17620 return VFP_LO_REGS;
17622 return VFP_HI_REGS;
17625 if (IS_IWMMXT_REGNUM (regno))
17626 return IWMMXT_REGS;
17628 if (IS_IWMMXT_GR_REGNUM (regno))
17629 return IWMMXT_GR_REGS;
17634 /* Handle a special case when computing the offset
17635 of an argument from the frame pointer. */
17637 arm_debugger_arg_offset (int value, rtx addr)
17641 /* We are only interested if dbxout_parms() failed to compute the offset. */
17645 /* We can only cope with the case where the address is held in a register. */
17646 if (GET_CODE (addr) != REG)
17649 /* If we are using the frame pointer to point at the argument, then
17650 an offset of 0 is correct. */
17651 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17654 /* If we are using the stack pointer to point at the
17655 argument, then an offset of 0 is correct. */
17656 /* ??? Check this is consistent with thumb2 frame layout. */
17657 if ((TARGET_THUMB || !frame_pointer_needed)
17658 && REGNO (addr) == SP_REGNUM)
17661 /* Oh dear. The argument is pointed to by a register rather
17662 than being held in a register, or being stored at a known
17663 offset from the frame pointer. Since GDB only understands
17664 those two kinds of argument we must translate the address
17665 held in the register into an offset from the frame pointer.
17666 We do this by searching through the insns for the function
17667 looking to see where this register gets its value. If the
17668 register is initialized from the frame pointer plus an offset
17669 then we are in luck and we can continue, otherwise we give up.
17671 This code is exercised by producing debugging information
17672 for a function with arguments like this:
17674 double func (double a, double b, int c, double d) {return d;}
17676 Without this code the stab for parameter 'd' will be set to
17677 an offset of 0 from the frame pointer, rather than 8. */
17679 /* The if() statement says:
17681 If the insn is a normal instruction
17682 and if the insn is setting the value in a register
17683 and if the register being set is the register holding the address of the argument
17684 and if the address is computing by an addition
17685 that involves adding to a register
17686 which is the frame pointer
17691 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17693 if ( GET_CODE (insn) == INSN
17694 && GET_CODE (PATTERN (insn)) == SET
17695 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17696 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17697 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17698 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17699 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17702 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17711 warning (0, "unable to compute real location of stacked parameter");
17712 value = 8; /* XXX magic hack */
17732 T_MAX /* Size of enum. Keep last. */
17733 } neon_builtin_type_mode;
17735 #define TYPE_MODE_BIT(X) (1 << (X))
17737 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
17738 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
17739 | TYPE_MODE_BIT (T_DI))
17740 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
17741 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
17742 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
17744 #define v8qi_UP T_V8QI
17745 #define v4hi_UP T_V4HI
17746 #define v2si_UP T_V2SI
17747 #define v2sf_UP T_V2SF
17749 #define v16qi_UP T_V16QI
17750 #define v8hi_UP T_V8HI
17751 #define v4si_UP T_V4SI
17752 #define v4sf_UP T_V4SF
17753 #define v2di_UP T_V2DI
17758 #define UP(X) X##_UP
17791 NEON_LOADSTRUCTLANE,
17793 NEON_STORESTRUCTLANE,
17802 const neon_itype itype;
17803 const neon_builtin_type_mode mode;
17804 const enum insn_code code;
17805 unsigned int fcode;
17806 } neon_builtin_datum;
17808 #define CF(N,X) CODE_FOR_neon_##N##X
17810 #define VAR1(T, N, A) \
17811 {#N, NEON_##T, UP (A), CF (N, A), 0}
17812 #define VAR2(T, N, A, B) \
17814 {#N, NEON_##T, UP (B), CF (N, B), 0}
17815 #define VAR3(T, N, A, B, C) \
17816 VAR2 (T, N, A, B), \
17817 {#N, NEON_##T, UP (C), CF (N, C), 0}
17818 #define VAR4(T, N, A, B, C, D) \
17819 VAR3 (T, N, A, B, C), \
17820 {#N, NEON_##T, UP (D), CF (N, D), 0}
17821 #define VAR5(T, N, A, B, C, D, E) \
17822 VAR4 (T, N, A, B, C, D), \
17823 {#N, NEON_##T, UP (E), CF (N, E), 0}
17824 #define VAR6(T, N, A, B, C, D, E, F) \
17825 VAR5 (T, N, A, B, C, D, E), \
17826 {#N, NEON_##T, UP (F), CF (N, F), 0}
17827 #define VAR7(T, N, A, B, C, D, E, F, G) \
17828 VAR6 (T, N, A, B, C, D, E, F), \
17829 {#N, NEON_##T, UP (G), CF (N, G), 0}
17830 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17831 VAR7 (T, N, A, B, C, D, E, F, G), \
17832 {#N, NEON_##T, UP (H), CF (N, H), 0}
17833 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17834 VAR8 (T, N, A, B, C, D, E, F, G, H), \
17835 {#N, NEON_##T, UP (I), CF (N, I), 0}
17836 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17837 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
17838 {#N, NEON_##T, UP (J), CF (N, J), 0}
17840 /* The mode entries in the following table correspond to the "key" type of the
17841 instruction variant, i.e. equivalent to that which would be specified after
17842 the assembler mnemonic, which usually refers to the last vector operand.
17843 (Signed/unsigned/polynomial types are not differentiated between though, and
17844 are all mapped onto the same mode for a given element size.) The modes
17845 listed per instruction should be the same as those defined for that
17846 instruction's pattern in neon.md. */
17848 static neon_builtin_datum neon_builtin_data[] =
17850 VAR10 (BINOP, vadd,
17851 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17852 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
17853 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
17854 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17855 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17856 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
17857 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17858 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17859 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
17860 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17861 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
17862 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
17863 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
17864 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
17865 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
17866 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
17867 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
17868 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
17869 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
17870 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
17871 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
17872 VAR2 (BINOP, vqdmull, v4hi, v2si),
17873 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17874 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17875 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17876 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
17877 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
17878 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
17879 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17880 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17881 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17882 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
17883 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17884 VAR10 (BINOP, vsub,
17885 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17886 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
17887 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
17888 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17889 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17890 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
17891 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17892 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17893 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17894 VAR2 (BINOP, vcage, v2sf, v4sf),
17895 VAR2 (BINOP, vcagt, v2sf, v4sf),
17896 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17897 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17898 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
17899 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17900 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
17901 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17902 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17903 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
17904 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17905 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17906 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
17907 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
17908 VAR2 (BINOP, vrecps, v2sf, v4sf),
17909 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
17910 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17911 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17912 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17913 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17914 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17915 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17916 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17917 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17918 VAR2 (UNOP, vcnt, v8qi, v16qi),
17919 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
17920 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
17921 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17922 /* FIXME: vget_lane supports more variants than this! */
17923 VAR10 (GETLANE, vget_lane,
17924 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17925 VAR10 (SETLANE, vset_lane,
17926 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17927 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
17928 VAR10 (DUP, vdup_n,
17929 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17930 VAR10 (DUPLANE, vdup_lane,
17931 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17932 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
17933 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
17934 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
17935 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
17936 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
17937 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
17938 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
17939 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17940 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17941 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
17942 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
17943 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17944 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
17945 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
17946 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17947 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17948 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
17949 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
17950 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17951 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
17952 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
17953 VAR10 (BINOP, vext,
17954 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17955 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17956 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
17957 VAR2 (UNOP, vrev16, v8qi, v16qi),
17958 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
17959 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
17960 VAR10 (SELECT, vbsl,
17961 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17962 VAR1 (VTBL, vtbl1, v8qi),
17963 VAR1 (VTBL, vtbl2, v8qi),
17964 VAR1 (VTBL, vtbl3, v8qi),
17965 VAR1 (VTBL, vtbl4, v8qi),
17966 VAR1 (VTBX, vtbx1, v8qi),
17967 VAR1 (VTBX, vtbx2, v8qi),
17968 VAR1 (VTBX, vtbx3, v8qi),
17969 VAR1 (VTBX, vtbx4, v8qi),
17970 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17971 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17972 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17973 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
17974 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
17975 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
17976 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
17977 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
17978 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
17979 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
17980 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
17981 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
17982 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
17983 VAR10 (LOAD1, vld1,
17984 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17985 VAR10 (LOAD1LANE, vld1_lane,
17986 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17987 VAR10 (LOAD1, vld1_dup,
17988 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17989 VAR10 (STORE1, vst1,
17990 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17991 VAR10 (STORE1LANE, vst1_lane,
17992 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17994 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
17995 VAR7 (LOADSTRUCTLANE, vld2_lane,
17996 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17997 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
17998 VAR9 (STORESTRUCT, vst2,
17999 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18000 VAR7 (STORESTRUCTLANE, vst2_lane,
18001 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18003 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18004 VAR7 (LOADSTRUCTLANE, vld3_lane,
18005 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18006 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
18007 VAR9 (STORESTRUCT, vst3,
18008 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18009 VAR7 (STORESTRUCTLANE, vst3_lane,
18010 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18011 VAR9 (LOADSTRUCT, vld4,
18012 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18013 VAR7 (LOADSTRUCTLANE, vld4_lane,
18014 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18015 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
18016 VAR9 (STORESTRUCT, vst4,
18017 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18018 VAR7 (STORESTRUCTLANE, vst4_lane,
18019 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18020 VAR10 (LOGICBINOP, vand,
18021 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18022 VAR10 (LOGICBINOP, vorr,
18023 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18024 VAR10 (BINOP, veor,
18025 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18026 VAR10 (LOGICBINOP, vbic,
18027 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18028 VAR10 (LOGICBINOP, vorn,
18029 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
18044 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18045 symbolic names defined here (which would require too much duplication).
18049 ARM_BUILTIN_GETWCX,
18050 ARM_BUILTIN_SETWCX,
18054 ARM_BUILTIN_WAVG2BR,
18055 ARM_BUILTIN_WAVG2HR,
18056 ARM_BUILTIN_WAVG2B,
18057 ARM_BUILTIN_WAVG2H,
18064 ARM_BUILTIN_WMACSZ,
18066 ARM_BUILTIN_WMACUZ,
18069 ARM_BUILTIN_WSADBZ,
18071 ARM_BUILTIN_WSADHZ,
18073 ARM_BUILTIN_WALIGN,
18076 ARM_BUILTIN_TMIAPH,
18077 ARM_BUILTIN_TMIABB,
18078 ARM_BUILTIN_TMIABT,
18079 ARM_BUILTIN_TMIATB,
18080 ARM_BUILTIN_TMIATT,
18082 ARM_BUILTIN_TMOVMSKB,
18083 ARM_BUILTIN_TMOVMSKH,
18084 ARM_BUILTIN_TMOVMSKW,
18086 ARM_BUILTIN_TBCSTB,
18087 ARM_BUILTIN_TBCSTH,
18088 ARM_BUILTIN_TBCSTW,
18090 ARM_BUILTIN_WMADDS,
18091 ARM_BUILTIN_WMADDU,
18093 ARM_BUILTIN_WPACKHSS,
18094 ARM_BUILTIN_WPACKWSS,
18095 ARM_BUILTIN_WPACKDSS,
18096 ARM_BUILTIN_WPACKHUS,
18097 ARM_BUILTIN_WPACKWUS,
18098 ARM_BUILTIN_WPACKDUS,
18103 ARM_BUILTIN_WADDSSB,
18104 ARM_BUILTIN_WADDSSH,
18105 ARM_BUILTIN_WADDSSW,
18106 ARM_BUILTIN_WADDUSB,
18107 ARM_BUILTIN_WADDUSH,
18108 ARM_BUILTIN_WADDUSW,
18112 ARM_BUILTIN_WSUBSSB,
18113 ARM_BUILTIN_WSUBSSH,
18114 ARM_BUILTIN_WSUBSSW,
18115 ARM_BUILTIN_WSUBUSB,
18116 ARM_BUILTIN_WSUBUSH,
18117 ARM_BUILTIN_WSUBUSW,
18124 ARM_BUILTIN_WCMPEQB,
18125 ARM_BUILTIN_WCMPEQH,
18126 ARM_BUILTIN_WCMPEQW,
18127 ARM_BUILTIN_WCMPGTUB,
18128 ARM_BUILTIN_WCMPGTUH,
18129 ARM_BUILTIN_WCMPGTUW,
18130 ARM_BUILTIN_WCMPGTSB,
18131 ARM_BUILTIN_WCMPGTSH,
18132 ARM_BUILTIN_WCMPGTSW,
18134 ARM_BUILTIN_TEXTRMSB,
18135 ARM_BUILTIN_TEXTRMSH,
18136 ARM_BUILTIN_TEXTRMSW,
18137 ARM_BUILTIN_TEXTRMUB,
18138 ARM_BUILTIN_TEXTRMUH,
18139 ARM_BUILTIN_TEXTRMUW,
18140 ARM_BUILTIN_TINSRB,
18141 ARM_BUILTIN_TINSRH,
18142 ARM_BUILTIN_TINSRW,
18144 ARM_BUILTIN_WMAXSW,
18145 ARM_BUILTIN_WMAXSH,
18146 ARM_BUILTIN_WMAXSB,
18147 ARM_BUILTIN_WMAXUW,
18148 ARM_BUILTIN_WMAXUH,
18149 ARM_BUILTIN_WMAXUB,
18150 ARM_BUILTIN_WMINSW,
18151 ARM_BUILTIN_WMINSH,
18152 ARM_BUILTIN_WMINSB,
18153 ARM_BUILTIN_WMINUW,
18154 ARM_BUILTIN_WMINUH,
18155 ARM_BUILTIN_WMINUB,
18157 ARM_BUILTIN_WMULUM,
18158 ARM_BUILTIN_WMULSM,
18159 ARM_BUILTIN_WMULUL,
18161 ARM_BUILTIN_PSADBH,
18162 ARM_BUILTIN_WSHUFH,
18176 ARM_BUILTIN_WSLLHI,
18177 ARM_BUILTIN_WSLLWI,
18178 ARM_BUILTIN_WSLLDI,
18179 ARM_BUILTIN_WSRAHI,
18180 ARM_BUILTIN_WSRAWI,
18181 ARM_BUILTIN_WSRADI,
18182 ARM_BUILTIN_WSRLHI,
18183 ARM_BUILTIN_WSRLWI,
18184 ARM_BUILTIN_WSRLDI,
18185 ARM_BUILTIN_WRORHI,
18186 ARM_BUILTIN_WRORWI,
18187 ARM_BUILTIN_WRORDI,
18189 ARM_BUILTIN_WUNPCKIHB,
18190 ARM_BUILTIN_WUNPCKIHH,
18191 ARM_BUILTIN_WUNPCKIHW,
18192 ARM_BUILTIN_WUNPCKILB,
18193 ARM_BUILTIN_WUNPCKILH,
18194 ARM_BUILTIN_WUNPCKILW,
18196 ARM_BUILTIN_WUNPCKEHSB,
18197 ARM_BUILTIN_WUNPCKEHSH,
18198 ARM_BUILTIN_WUNPCKEHSW,
18199 ARM_BUILTIN_WUNPCKEHUB,
18200 ARM_BUILTIN_WUNPCKEHUH,
18201 ARM_BUILTIN_WUNPCKEHUW,
18202 ARM_BUILTIN_WUNPCKELSB,
18203 ARM_BUILTIN_WUNPCKELSH,
18204 ARM_BUILTIN_WUNPCKELSW,
18205 ARM_BUILTIN_WUNPCKELUB,
18206 ARM_BUILTIN_WUNPCKELUH,
18207 ARM_BUILTIN_WUNPCKELUW,
18209 ARM_BUILTIN_THREAD_POINTER,
18211 ARM_BUILTIN_NEON_BASE,
18213 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
18216 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
18219 arm_init_neon_builtins (void)
18221 unsigned int i, fcode;
18224 tree neon_intQI_type_node;
18225 tree neon_intHI_type_node;
18226 tree neon_polyQI_type_node;
18227 tree neon_polyHI_type_node;
18228 tree neon_intSI_type_node;
18229 tree neon_intDI_type_node;
18230 tree neon_float_type_node;
18232 tree intQI_pointer_node;
18233 tree intHI_pointer_node;
18234 tree intSI_pointer_node;
18235 tree intDI_pointer_node;
18236 tree float_pointer_node;
18238 tree const_intQI_node;
18239 tree const_intHI_node;
18240 tree const_intSI_node;
18241 tree const_intDI_node;
18242 tree const_float_node;
18244 tree const_intQI_pointer_node;
18245 tree const_intHI_pointer_node;
18246 tree const_intSI_pointer_node;
18247 tree const_intDI_pointer_node;
18248 tree const_float_pointer_node;
18250 tree V8QI_type_node;
18251 tree V4HI_type_node;
18252 tree V2SI_type_node;
18253 tree V2SF_type_node;
18254 tree V16QI_type_node;
18255 tree V8HI_type_node;
18256 tree V4SI_type_node;
18257 tree V4SF_type_node;
18258 tree V2DI_type_node;
18260 tree intUQI_type_node;
18261 tree intUHI_type_node;
18262 tree intUSI_type_node;
18263 tree intUDI_type_node;
18265 tree intEI_type_node;
18266 tree intOI_type_node;
18267 tree intCI_type_node;
18268 tree intXI_type_node;
18270 tree V8QI_pointer_node;
18271 tree V4HI_pointer_node;
18272 tree V2SI_pointer_node;
18273 tree V2SF_pointer_node;
18274 tree V16QI_pointer_node;
18275 tree V8HI_pointer_node;
18276 tree V4SI_pointer_node;
18277 tree V4SF_pointer_node;
18278 tree V2DI_pointer_node;
18280 tree void_ftype_pv8qi_v8qi_v8qi;
18281 tree void_ftype_pv4hi_v4hi_v4hi;
18282 tree void_ftype_pv2si_v2si_v2si;
18283 tree void_ftype_pv2sf_v2sf_v2sf;
18284 tree void_ftype_pdi_di_di;
18285 tree void_ftype_pv16qi_v16qi_v16qi;
18286 tree void_ftype_pv8hi_v8hi_v8hi;
18287 tree void_ftype_pv4si_v4si_v4si;
18288 tree void_ftype_pv4sf_v4sf_v4sf;
18289 tree void_ftype_pv2di_v2di_v2di;
18291 tree reinterp_ftype_dreg[5][5];
18292 tree reinterp_ftype_qreg[5][5];
18293 tree dreg_types[5], qreg_types[5];
18295 /* Create distinguished type nodes for NEON vector element types,
18296 and pointers to values of such types, so we can detect them later. */
18297 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18298 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18299 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18300 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18301 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18302 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18303 neon_float_type_node = make_node (REAL_TYPE);
18304 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18305 layout_type (neon_float_type_node);
18307 /* Define typedefs which exactly correspond to the modes we are basing vector
18308 types on. If you change these names you'll need to change
18309 the table used by arm_mangle_type too. */
18310 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18311 "__builtin_neon_qi");
18312 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18313 "__builtin_neon_hi");
18314 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18315 "__builtin_neon_si");
18316 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18317 "__builtin_neon_sf");
18318 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18319 "__builtin_neon_di");
18320 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18321 "__builtin_neon_poly8");
18322 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18323 "__builtin_neon_poly16");
18325 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18326 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18327 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18328 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18329 float_pointer_node = build_pointer_type (neon_float_type_node);
18331 /* Next create constant-qualified versions of the above types. */
18332 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18334 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18336 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18338 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18340 const_float_node = build_qualified_type (neon_float_type_node,
18343 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18344 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18345 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18346 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18347 const_float_pointer_node = build_pointer_type (const_float_node);
18349 /* Now create vector types based on our NEON element types. */
18350 /* 64-bit vectors. */
18352 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18354 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18356 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18358 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18359 /* 128-bit vectors. */
18361 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18363 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18365 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18367 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18369 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18371 /* Unsigned integer types for various mode sizes. */
18372 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18373 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18374 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18375 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18377 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18378 "__builtin_neon_uqi");
18379 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18380 "__builtin_neon_uhi");
18381 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18382 "__builtin_neon_usi");
18383 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18384 "__builtin_neon_udi");
18386 /* Opaque integer types for structures of vectors. */
18387 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18388 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18389 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18390 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18392 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18393 "__builtin_neon_ti");
18394 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18395 "__builtin_neon_ei");
18396 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18397 "__builtin_neon_oi");
18398 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18399 "__builtin_neon_ci");
18400 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18401 "__builtin_neon_xi");
18403 /* Pointers to vector types. */
18404 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18405 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18406 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18407 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18408 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18409 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18410 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18411 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18412 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18414 /* Operations which return results as pairs. */
18415 void_ftype_pv8qi_v8qi_v8qi =
18416 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18417 V8QI_type_node, NULL);
18418 void_ftype_pv4hi_v4hi_v4hi =
18419 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18420 V4HI_type_node, NULL);
18421 void_ftype_pv2si_v2si_v2si =
18422 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18423 V2SI_type_node, NULL);
18424 void_ftype_pv2sf_v2sf_v2sf =
18425 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18426 V2SF_type_node, NULL);
18427 void_ftype_pdi_di_di =
18428 build_function_type_list (void_type_node, intDI_pointer_node,
18429 neon_intDI_type_node, neon_intDI_type_node, NULL);
18430 void_ftype_pv16qi_v16qi_v16qi =
18431 build_function_type_list (void_type_node, V16QI_pointer_node,
18432 V16QI_type_node, V16QI_type_node, NULL);
18433 void_ftype_pv8hi_v8hi_v8hi =
18434 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18435 V8HI_type_node, NULL);
18436 void_ftype_pv4si_v4si_v4si =
18437 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18438 V4SI_type_node, NULL);
18439 void_ftype_pv4sf_v4sf_v4sf =
18440 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18441 V4SF_type_node, NULL);
18442 void_ftype_pv2di_v2di_v2di =
18443 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18444 V2DI_type_node, NULL);
18446 dreg_types[0] = V8QI_type_node;
18447 dreg_types[1] = V4HI_type_node;
18448 dreg_types[2] = V2SI_type_node;
18449 dreg_types[3] = V2SF_type_node;
18450 dreg_types[4] = neon_intDI_type_node;
18452 qreg_types[0] = V16QI_type_node;
18453 qreg_types[1] = V8HI_type_node;
18454 qreg_types[2] = V4SI_type_node;
18455 qreg_types[3] = V4SF_type_node;
18456 qreg_types[4] = V2DI_type_node;
18458 for (i = 0; i < 5; i++)
18461 for (j = 0; j < 5; j++)
18463 reinterp_ftype_dreg[i][j]
18464 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18465 reinterp_ftype_qreg[i][j]
18466 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18470 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
18471 i < ARRAY_SIZE (neon_builtin_data);
18474 neon_builtin_datum *d = &neon_builtin_data[i];
18476 const char* const modenames[] = {
18477 "v8qi", "v4hi", "v2si", "v2sf", "di",
18478 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
18483 int is_load = 0, is_store = 0;
18485 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
18492 case NEON_LOAD1LANE:
18493 case NEON_LOADSTRUCT:
18494 case NEON_LOADSTRUCTLANE:
18496 /* Fall through. */
18498 case NEON_STORE1LANE:
18499 case NEON_STORESTRUCT:
18500 case NEON_STORESTRUCTLANE:
18503 /* Fall through. */
18506 case NEON_LOGICBINOP:
18507 case NEON_SHIFTINSERT:
18514 case NEON_SHIFTIMM:
18515 case NEON_SHIFTACC:
18521 case NEON_LANEMULL:
18522 case NEON_LANEMULH:
18524 case NEON_SCALARMUL:
18525 case NEON_SCALARMULL:
18526 case NEON_SCALARMULH:
18527 case NEON_SCALARMAC:
18533 tree return_type = void_type_node, args = void_list_node;
18535 /* Build a function type directly from the insn_data for
18536 this builtin. The build_function_type() function takes
18537 care of removing duplicates for us. */
18538 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
18542 if (is_load && k == 1)
18544 /* Neon load patterns always have the memory
18545 operand in the operand 1 position. */
18546 gcc_assert (insn_data[d->code].operand[k].predicate
18547 == neon_struct_operand);
18553 eltype = const_intQI_pointer_node;
18558 eltype = const_intHI_pointer_node;
18563 eltype = const_intSI_pointer_node;
18568 eltype = const_float_pointer_node;
18573 eltype = const_intDI_pointer_node;
18576 default: gcc_unreachable ();
18579 else if (is_store && k == 0)
18581 /* Similarly, Neon store patterns use operand 0 as
18582 the memory location to store to. */
18583 gcc_assert (insn_data[d->code].operand[k].predicate
18584 == neon_struct_operand);
18590 eltype = intQI_pointer_node;
18595 eltype = intHI_pointer_node;
18600 eltype = intSI_pointer_node;
18605 eltype = float_pointer_node;
18610 eltype = intDI_pointer_node;
18613 default: gcc_unreachable ();
18618 switch (insn_data[d->code].operand[k].mode)
18620 case VOIDmode: eltype = void_type_node; break;
18622 case QImode: eltype = neon_intQI_type_node; break;
18623 case HImode: eltype = neon_intHI_type_node; break;
18624 case SImode: eltype = neon_intSI_type_node; break;
18625 case SFmode: eltype = neon_float_type_node; break;
18626 case DImode: eltype = neon_intDI_type_node; break;
18627 case TImode: eltype = intTI_type_node; break;
18628 case EImode: eltype = intEI_type_node; break;
18629 case OImode: eltype = intOI_type_node; break;
18630 case CImode: eltype = intCI_type_node; break;
18631 case XImode: eltype = intXI_type_node; break;
18632 /* 64-bit vectors. */
18633 case V8QImode: eltype = V8QI_type_node; break;
18634 case V4HImode: eltype = V4HI_type_node; break;
18635 case V2SImode: eltype = V2SI_type_node; break;
18636 case V2SFmode: eltype = V2SF_type_node; break;
18637 /* 128-bit vectors. */
18638 case V16QImode: eltype = V16QI_type_node; break;
18639 case V8HImode: eltype = V8HI_type_node; break;
18640 case V4SImode: eltype = V4SI_type_node; break;
18641 case V4SFmode: eltype = V4SF_type_node; break;
18642 case V2DImode: eltype = V2DI_type_node; break;
18643 default: gcc_unreachable ();
18647 if (k == 0 && !is_store)
18648 return_type = eltype;
18650 args = tree_cons (NULL_TREE, eltype, args);
18653 ftype = build_function_type (return_type, args);
18657 case NEON_RESULTPAIR:
18659 switch (insn_data[d->code].operand[1].mode)
18661 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18662 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18663 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18664 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18665 case DImode: ftype = void_ftype_pdi_di_di; break;
18666 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18667 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18668 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18669 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18670 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18671 default: gcc_unreachable ();
18676 case NEON_REINTERP:
18678 /* We iterate over 5 doubleword types, then 5 quadword
18680 int rhs = d->mode % 5;
18681 switch (insn_data[d->code].operand[0].mode)
18683 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18684 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
18685 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
18686 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
18687 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
18688 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
18689 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
18690 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
18691 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
18692 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
18693 default: gcc_unreachable ();
18699 gcc_unreachable ();
18702 gcc_assert (ftype != NULL);
18704 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
18706 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
18708 arm_builtin_decls[fcode] = decl;
18712 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
18715 if ((MASK) & insn_flags) \
18718 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
18719 BUILT_IN_MD, NULL, NULL_TREE); \
18720 arm_builtin_decls[CODE] = bdecl; \
18725 struct builtin_description
18727 const unsigned int mask;
18728 const enum insn_code icode;
18729 const char * const name;
18730 const enum arm_builtins code;
18731 const enum rtx_code comparison;
18732 const unsigned int flag;
18735 static const struct builtin_description bdesc_2arg[] =
18737 #define IWMMXT_BUILTIN(code, string, builtin) \
18738 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
18739 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
18741 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
18742 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
18743 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
18744 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
18745 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
18746 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
18747 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
18748 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
18749 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
18750 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
18751 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
18752 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
18753 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
18754 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
18755 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
18756 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
18757 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
18758 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
18759 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
18760 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
18761 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
18762 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
18763 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
18764 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
18765 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
18766 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
18767 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
18768 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
18769 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
18770 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
18771 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
18772 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
18773 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
18774 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
18775 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
18776 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
18777 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
18778 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
18779 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
18780 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
18781 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
18782 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
18783 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
18784 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
18785 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
18786 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
18787 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
18788 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
18789 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
18790 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
18791 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
18792 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
18793 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
18794 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
18795 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
18796 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
18797 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
18798 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
18800 #define IWMMXT_BUILTIN2(code, builtin) \
18801 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
18803 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
18804 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
18805 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
18806 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
18807 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
18808 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
18809 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
18810 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
18811 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
18812 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
18813 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
18814 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
18815 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
18816 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
18817 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
18818 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
18819 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
18820 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
18821 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
18822 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
18823 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
18824 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
18825 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
18826 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
18827 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
18828 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
18829 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
18830 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
18831 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
18832 IWMMXT_BUILTIN2 (rordi3, WRORDI)
18833 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
18834 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
18837 static const struct builtin_description bdesc_1arg[] =
18839 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
18840 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
18841 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
18842 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
18843 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
18844 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
18845 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
18846 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
18847 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
18848 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
18849 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
18850 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
18851 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
18852 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
18853 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
18854 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
18855 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
18856 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
18859 /* Set up all the iWMMXt builtins. This is not called if
18860 TARGET_IWMMXT is zero. */
18863 arm_init_iwmmxt_builtins (void)
18865 const struct builtin_description * d;
18868 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18869 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18870 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
18873 = build_function_type_list (integer_type_node,
18874 integer_type_node, NULL_TREE);
18875 tree v8qi_ftype_v8qi_v8qi_int
18876 = build_function_type_list (V8QI_type_node,
18877 V8QI_type_node, V8QI_type_node,
18878 integer_type_node, NULL_TREE);
18879 tree v4hi_ftype_v4hi_int
18880 = build_function_type_list (V4HI_type_node,
18881 V4HI_type_node, integer_type_node, NULL_TREE);
18882 tree v2si_ftype_v2si_int
18883 = build_function_type_list (V2SI_type_node,
18884 V2SI_type_node, integer_type_node, NULL_TREE);
18885 tree v2si_ftype_di_di
18886 = build_function_type_list (V2SI_type_node,
18887 long_long_integer_type_node,
18888 long_long_integer_type_node,
18890 tree di_ftype_di_int
18891 = build_function_type_list (long_long_integer_type_node,
18892 long_long_integer_type_node,
18893 integer_type_node, NULL_TREE);
18894 tree di_ftype_di_int_int
18895 = build_function_type_list (long_long_integer_type_node,
18896 long_long_integer_type_node,
18898 integer_type_node, NULL_TREE);
18899 tree int_ftype_v8qi
18900 = build_function_type_list (integer_type_node,
18901 V8QI_type_node, NULL_TREE);
18902 tree int_ftype_v4hi
18903 = build_function_type_list (integer_type_node,
18904 V4HI_type_node, NULL_TREE);
18905 tree int_ftype_v2si
18906 = build_function_type_list (integer_type_node,
18907 V2SI_type_node, NULL_TREE);
18908 tree int_ftype_v8qi_int
18909 = build_function_type_list (integer_type_node,
18910 V8QI_type_node, integer_type_node, NULL_TREE);
18911 tree int_ftype_v4hi_int
18912 = build_function_type_list (integer_type_node,
18913 V4HI_type_node, integer_type_node, NULL_TREE);
18914 tree int_ftype_v2si_int
18915 = build_function_type_list (integer_type_node,
18916 V2SI_type_node, integer_type_node, NULL_TREE);
18917 tree v8qi_ftype_v8qi_int_int
18918 = build_function_type_list (V8QI_type_node,
18919 V8QI_type_node, integer_type_node,
18920 integer_type_node, NULL_TREE);
18921 tree v4hi_ftype_v4hi_int_int
18922 = build_function_type_list (V4HI_type_node,
18923 V4HI_type_node, integer_type_node,
18924 integer_type_node, NULL_TREE);
18925 tree v2si_ftype_v2si_int_int
18926 = build_function_type_list (V2SI_type_node,
18927 V2SI_type_node, integer_type_node,
18928 integer_type_node, NULL_TREE);
18929 /* Miscellaneous. */
18930 tree v8qi_ftype_v4hi_v4hi
18931 = build_function_type_list (V8QI_type_node,
18932 V4HI_type_node, V4HI_type_node, NULL_TREE);
18933 tree v4hi_ftype_v2si_v2si
18934 = build_function_type_list (V4HI_type_node,
18935 V2SI_type_node, V2SI_type_node, NULL_TREE);
18936 tree v2si_ftype_v4hi_v4hi
18937 = build_function_type_list (V2SI_type_node,
18938 V4HI_type_node, V4HI_type_node, NULL_TREE);
18939 tree v2si_ftype_v8qi_v8qi
18940 = build_function_type_list (V2SI_type_node,
18941 V8QI_type_node, V8QI_type_node, NULL_TREE);
18942 tree v4hi_ftype_v4hi_di
18943 = build_function_type_list (V4HI_type_node,
18944 V4HI_type_node, long_long_integer_type_node,
18946 tree v2si_ftype_v2si_di
18947 = build_function_type_list (V2SI_type_node,
18948 V2SI_type_node, long_long_integer_type_node,
18950 tree void_ftype_int_int
18951 = build_function_type_list (void_type_node,
18952 integer_type_node, integer_type_node,
18955 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
18957 = build_function_type_list (long_long_integer_type_node,
18958 V8QI_type_node, NULL_TREE);
18960 = build_function_type_list (long_long_integer_type_node,
18961 V4HI_type_node, NULL_TREE);
18963 = build_function_type_list (long_long_integer_type_node,
18964 V2SI_type_node, NULL_TREE);
18965 tree v2si_ftype_v4hi
18966 = build_function_type_list (V2SI_type_node,
18967 V4HI_type_node, NULL_TREE);
18968 tree v4hi_ftype_v8qi
18969 = build_function_type_list (V4HI_type_node,
18970 V8QI_type_node, NULL_TREE);
18972 tree di_ftype_di_v4hi_v4hi
18973 = build_function_type_list (long_long_unsigned_type_node,
18974 long_long_unsigned_type_node,
18975 V4HI_type_node, V4HI_type_node,
18978 tree di_ftype_v4hi_v4hi
18979 = build_function_type_list (long_long_unsigned_type_node,
18980 V4HI_type_node,V4HI_type_node,
18983 /* Normal vector binops. */
18984 tree v8qi_ftype_v8qi_v8qi
18985 = build_function_type_list (V8QI_type_node,
18986 V8QI_type_node, V8QI_type_node, NULL_TREE);
18987 tree v4hi_ftype_v4hi_v4hi
18988 = build_function_type_list (V4HI_type_node,
18989 V4HI_type_node,V4HI_type_node, NULL_TREE);
18990 tree v2si_ftype_v2si_v2si
18991 = build_function_type_list (V2SI_type_node,
18992 V2SI_type_node, V2SI_type_node, NULL_TREE);
18993 tree di_ftype_di_di
18994 = build_function_type_list (long_long_unsigned_type_node,
18995 long_long_unsigned_type_node,
18996 long_long_unsigned_type_node,
18999 /* Add all builtins that are more or less simple operations on two
19001 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19003 /* Use one of the operands; the target can have a different mode for
19004 mask-generating compares. */
19005 enum machine_mode mode;
19011 mode = insn_data[d->icode].operand[1].mode;
19016 type = v8qi_ftype_v8qi_v8qi;
19019 type = v4hi_ftype_v4hi_v4hi;
19022 type = v2si_ftype_v2si_v2si;
19025 type = di_ftype_di_di;
19029 gcc_unreachable ();
19032 def_mbuiltin (d->mask, d->name, type, d->code);
19035 /* Add the remaining MMX insns with somewhat more complicated types. */
19036 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
19037 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
19038 ARM_BUILTIN_ ## CODE)
19040 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
19041 iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX);
19042 iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX);
19044 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
19045 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
19046 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
19047 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
19048 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
19049 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
19051 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
19052 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
19053 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
19054 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
19055 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
19056 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
19058 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
19059 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
19060 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
19061 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
19062 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
19063 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
19065 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
19066 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
19067 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
19068 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
19069 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
19070 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
19072 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
19074 iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB);
19075 iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH);
19076 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
19077 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
19079 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
19080 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
19081 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
19082 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
19083 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
19084 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
19085 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
19086 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
19087 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
19089 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
19090 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
19091 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
19093 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
19094 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
19095 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
19097 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
19098 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
19099 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
19100 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
19101 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
19102 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
19104 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
19105 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
19106 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
19107 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
19108 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
19109 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
19110 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
19111 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
19112 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
19113 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
19114 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
19115 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
19117 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
19118 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
19119 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
19120 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
19122 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN);
19123 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
19124 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
19125 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
19126 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
19127 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
19128 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
19130 #undef iwmmx_mbuiltin
19134 arm_init_tls_builtins (void)
19138 ftype = build_function_type (ptr_type_node, void_list_node);
19139 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
19140 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
19142 TREE_NOTHROW (decl) = 1;
19143 TREE_READONLY (decl) = 1;
19144 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
19148 arm_init_fp16_builtins (void)
19150 tree fp16_type = make_node (REAL_TYPE);
19151 TYPE_PRECISION (fp16_type) = 16;
19152 layout_type (fp16_type);
19153 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
19157 arm_init_builtins (void)
19159 arm_init_tls_builtins ();
19161 if (TARGET_REALLY_IWMMXT)
19162 arm_init_iwmmxt_builtins ();
19165 arm_init_neon_builtins ();
19167 if (arm_fp16_format)
19168 arm_init_fp16_builtins ();
19171 /* Return the ARM builtin for CODE. */
19174 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
19176 if (code >= ARM_BUILTIN_MAX)
19177 return error_mark_node;
19179 return arm_builtin_decls[code];
19182 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19184 static const char *
19185 arm_invalid_parameter_type (const_tree t)
19187 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19188 return N_("function parameters cannot have __fp16 type");
19192 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19194 static const char *
19195 arm_invalid_return_type (const_tree t)
19197 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19198 return N_("functions cannot return __fp16 type");
19202 /* Implement TARGET_PROMOTED_TYPE. */
19205 arm_promoted_type (const_tree t)
19207 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19208 return float_type_node;
19212 /* Implement TARGET_CONVERT_TO_TYPE.
19213 Specifically, this hook implements the peculiarity of the ARM
19214 half-precision floating-point C semantics that requires conversions between
19215 __fp16 to or from double to do an intermediate conversion to float. */
19218 arm_convert_to_type (tree type, tree expr)
19220 tree fromtype = TREE_TYPE (expr);
19221 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
19223 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
19224 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
19225 return convert (type, convert (float_type_node, expr));
19229 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19230 This simply adds HFmode as a supported mode; even though we don't
19231 implement arithmetic on this type directly, it's supported by
19232 optabs conversions, much the way the double-word arithmetic is
19233 special-cased in the default hook. */
19236 arm_scalar_mode_supported_p (enum machine_mode mode)
19238 if (mode == HFmode)
19239 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
19241 return default_scalar_mode_supported_p (mode);
19244 /* Errors in the source file can cause expand_expr to return const0_rtx
19245 where we expect a vector. To avoid crashing, use one of the vector
19246 clear instructions. */
19249 safe_vector_operand (rtx x, enum machine_mode mode)
19251 if (x != const0_rtx)
19253 x = gen_reg_rtx (mode);
19255 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
19256 : gen_rtx_SUBREG (DImode, x, 0)));
19260 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19263 arm_expand_binop_builtin (enum insn_code icode,
19264 tree exp, rtx target)
19267 tree arg0 = CALL_EXPR_ARG (exp, 0);
19268 tree arg1 = CALL_EXPR_ARG (exp, 1);
19269 rtx op0 = expand_normal (arg0);
19270 rtx op1 = expand_normal (arg1);
19271 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19272 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19273 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19275 if (VECTOR_MODE_P (mode0))
19276 op0 = safe_vector_operand (op0, mode0);
19277 if (VECTOR_MODE_P (mode1))
19278 op1 = safe_vector_operand (op1, mode1);
19281 || GET_MODE (target) != tmode
19282 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19283 target = gen_reg_rtx (tmode);
19285 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
19287 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19288 op0 = copy_to_mode_reg (mode0, op0);
19289 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19290 op1 = copy_to_mode_reg (mode1, op1);
19292 pat = GEN_FCN (icode) (target, op0, op1);
19299 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19302 arm_expand_unop_builtin (enum insn_code icode,
19303 tree exp, rtx target, int do_load)
19306 tree arg0 = CALL_EXPR_ARG (exp, 0);
19307 rtx op0 = expand_normal (arg0);
19308 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19309 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19312 || GET_MODE (target) != tmode
19313 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19314 target = gen_reg_rtx (tmode);
19316 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19319 if (VECTOR_MODE_P (mode0))
19320 op0 = safe_vector_operand (op0, mode0);
19322 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19323 op0 = copy_to_mode_reg (mode0, op0);
19326 pat = GEN_FCN (icode) (target, op0);
19334 NEON_ARG_COPY_TO_REG,
19340 #define NEON_MAX_BUILTIN_ARGS 5
19342 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
19343 and return an expression for the accessed memory.
19345 The intrinsic function operates on a block of registers that has
19346 mode REG_MODE. This block contains vectors of type TYPE_MODE.
19347 The function references the memory at EXP in mode MEM_MODE;
19348 this mode may be BLKmode if no more suitable mode is available. */
19351 neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
19352 enum machine_mode reg_mode,
19353 neon_builtin_type_mode type_mode)
19355 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
19356 tree elem_type, upper_bound, array_type;
19358 /* Work out the size of the register block in bytes. */
19359 reg_size = GET_MODE_SIZE (reg_mode);
19361 /* Work out the size of each vector in bytes. */
19362 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
19363 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
19365 /* Work out how many vectors there are. */
19366 gcc_assert (reg_size % vector_size == 0);
19367 nvectors = reg_size / vector_size;
19369 /* Work out how many elements are being loaded or stored.
19370 MEM_MODE == REG_MODE implies a one-to-one mapping between register
19371 and memory elements; anything else implies a lane load or store. */
19372 if (mem_mode == reg_mode)
19373 nelems = vector_size * nvectors;
19377 /* Work out the type of each element. */
19378 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
19379 elem_type = TREE_TYPE (TREE_TYPE (exp));
19381 /* Create a type that describes the full access. */
19382 upper_bound = build_int_cst (size_type_node, nelems - 1);
19383 array_type = build_array_type (elem_type, build_index_type (upper_bound));
19385 /* Dereference EXP using that type. */
19386 exp = convert (build_pointer_type (array_type), exp);
19387 return fold_build2 (MEM_REF, array_type, exp,
19388 build_int_cst (TREE_TYPE (exp), 0));
19391 /* Expand a Neon builtin. */
19393 arm_expand_neon_args (rtx target, int icode, int have_retval,
19394 neon_builtin_type_mode type_mode,
19399 tree arg[NEON_MAX_BUILTIN_ARGS];
19400 rtx op[NEON_MAX_BUILTIN_ARGS];
19401 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19402 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
19403 enum machine_mode other_mode;
19409 || GET_MODE (target) != tmode
19410 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
19411 target = gen_reg_rtx (tmode);
19413 va_start (ap, exp);
19417 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
19419 if (thisarg == NEON_ARG_STOP)
19423 opno = argc + have_retval;
19424 mode[argc] = insn_data[icode].operand[opno].mode;
19425 arg[argc] = CALL_EXPR_ARG (exp, argc);
19426 if (thisarg == NEON_ARG_MEMORY)
19428 other_mode = insn_data[icode].operand[1 - opno].mode;
19429 arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
19430 other_mode, type_mode);
19432 op[argc] = expand_normal (arg[argc]);
19436 case NEON_ARG_COPY_TO_REG:
19437 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19438 if (!(*insn_data[icode].operand[opno].predicate)
19439 (op[argc], mode[argc]))
19440 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19443 case NEON_ARG_CONSTANT:
19444 /* FIXME: This error message is somewhat unhelpful. */
19445 if (!(*insn_data[icode].operand[opno].predicate)
19446 (op[argc], mode[argc]))
19447 error ("argument must be a constant");
19450 case NEON_ARG_MEMORY:
19451 gcc_assert (MEM_P (op[argc]));
19452 PUT_MODE (op[argc], mode[argc]);
19453 /* ??? arm_neon.h uses the same built-in functions for signed
19454 and unsigned accesses, casting where necessary. This isn't
19456 set_mem_alias_set (op[argc], 0);
19457 if (!(*insn_data[icode].operand[opno].predicate)
19458 (op[argc], mode[argc]))
19459 op[argc] = (replace_equiv_address
19460 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
19463 case NEON_ARG_STOP:
19464 gcc_unreachable ();
19477 pat = GEN_FCN (icode) (target, op[0]);
19481 pat = GEN_FCN (icode) (target, op[0], op[1]);
19485 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19489 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19493 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19497 gcc_unreachable ();
19503 pat = GEN_FCN (icode) (op[0]);
19507 pat = GEN_FCN (icode) (op[0], op[1]);
19511 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19515 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19519 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19523 gcc_unreachable ();
19534 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19535 constants defined per-instruction or per instruction-variant. Instead, the
19536 required info is looked up in the table neon_builtin_data. */
19538 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19540 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
19541 neon_itype itype = d->itype;
19542 enum insn_code icode = d->code;
19543 neon_builtin_type_mode type_mode = d->mode;
19550 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19551 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19555 case NEON_SCALARMUL:
19556 case NEON_SCALARMULL:
19557 case NEON_SCALARMULH:
19558 case NEON_SHIFTINSERT:
19559 case NEON_LOGICBINOP:
19560 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19561 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19565 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19566 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19567 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19571 case NEON_SHIFTIMM:
19572 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19573 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19577 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19578 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19582 case NEON_REINTERP:
19583 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19584 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19588 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19589 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19591 case NEON_RESULTPAIR:
19592 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
19593 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19597 case NEON_LANEMULL:
19598 case NEON_LANEMULH:
19599 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19600 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19601 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19604 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19605 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19606 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19608 case NEON_SHIFTACC:
19609 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19610 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19611 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19613 case NEON_SCALARMAC:
19614 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19615 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19616 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19620 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19621 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19625 case NEON_LOADSTRUCT:
19626 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19627 NEON_ARG_MEMORY, NEON_ARG_STOP);
19629 case NEON_LOAD1LANE:
19630 case NEON_LOADSTRUCTLANE:
19631 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19632 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19636 case NEON_STORESTRUCT:
19637 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
19638 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19640 case NEON_STORE1LANE:
19641 case NEON_STORESTRUCTLANE:
19642 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
19643 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19647 gcc_unreachable ();
19650 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19652 neon_reinterpret (rtx dest, rtx src)
19654 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19657 /* Emit code to place a Neon pair result in memory locations (with equal
19660 neon_emit_pair_result_insn (enum machine_mode mode,
19661 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19664 rtx mem = gen_rtx_MEM (mode, destaddr);
19665 rtx tmp1 = gen_reg_rtx (mode);
19666 rtx tmp2 = gen_reg_rtx (mode);
19668 emit_insn (intfn (tmp1, op1, op2, tmp2));
19670 emit_move_insn (mem, tmp1);
19671 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19672 emit_move_insn (mem, tmp2);
19675 /* Set up operands for a register copy from src to dest, taking care not to
19676 clobber registers in the process.
19677 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19678 be called with a large N, so that should be OK. */
19681 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19683 unsigned int copied = 0, opctr = 0;
19684 unsigned int done = (1 << count) - 1;
19687 while (copied != done)
19689 for (i = 0; i < count; i++)
19693 for (j = 0; good && j < count; j++)
19694 if (i != j && (copied & (1 << j)) == 0
19695 && reg_overlap_mentioned_p (src[j], dest[i]))
19700 operands[opctr++] = dest[i];
19701 operands[opctr++] = src[i];
19707 gcc_assert (opctr == count * 2);
19710 /* Expand an expression EXP that calls a built-in function,
19711 with result going to TARGET if that's convenient
19712 (and in mode MODE if that's convenient).
19713 SUBTARGET may be used as the target for computing one of EXP's operands.
19714 IGNORE is nonzero if the value is to be ignored. */
19717 arm_expand_builtin (tree exp,
19719 rtx subtarget ATTRIBUTE_UNUSED,
19720 enum machine_mode mode ATTRIBUTE_UNUSED,
19721 int ignore ATTRIBUTE_UNUSED)
19723 const struct builtin_description * d;
19724 enum insn_code icode;
19725 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19733 int fcode = DECL_FUNCTION_CODE (fndecl);
19735 enum machine_mode tmode;
19736 enum machine_mode mode0;
19737 enum machine_mode mode1;
19738 enum machine_mode mode2;
19740 if (fcode >= ARM_BUILTIN_NEON_BASE)
19741 return arm_expand_neon_builtin (fcode, exp, target);
19745 case ARM_BUILTIN_TEXTRMSB:
19746 case ARM_BUILTIN_TEXTRMUB:
19747 case ARM_BUILTIN_TEXTRMSH:
19748 case ARM_BUILTIN_TEXTRMUH:
19749 case ARM_BUILTIN_TEXTRMSW:
19750 case ARM_BUILTIN_TEXTRMUW:
19751 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19752 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19753 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19754 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19755 : CODE_FOR_iwmmxt_textrmw);
19757 arg0 = CALL_EXPR_ARG (exp, 0);
19758 arg1 = CALL_EXPR_ARG (exp, 1);
19759 op0 = expand_normal (arg0);
19760 op1 = expand_normal (arg1);
19761 tmode = insn_data[icode].operand[0].mode;
19762 mode0 = insn_data[icode].operand[1].mode;
19763 mode1 = insn_data[icode].operand[2].mode;
19765 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19766 op0 = copy_to_mode_reg (mode0, op0);
19767 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19769 /* @@@ better error message */
19770 error ("selector must be an immediate");
19771 return gen_reg_rtx (tmode);
19774 || GET_MODE (target) != tmode
19775 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19776 target = gen_reg_rtx (tmode);
19777 pat = GEN_FCN (icode) (target, op0, op1);
19783 case ARM_BUILTIN_TINSRB:
19784 case ARM_BUILTIN_TINSRH:
19785 case ARM_BUILTIN_TINSRW:
19786 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19787 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19788 : CODE_FOR_iwmmxt_tinsrw);
19789 arg0 = CALL_EXPR_ARG (exp, 0);
19790 arg1 = CALL_EXPR_ARG (exp, 1);
19791 arg2 = CALL_EXPR_ARG (exp, 2);
19792 op0 = expand_normal (arg0);
19793 op1 = expand_normal (arg1);
19794 op2 = expand_normal (arg2);
19795 tmode = insn_data[icode].operand[0].mode;
19796 mode0 = insn_data[icode].operand[1].mode;
19797 mode1 = insn_data[icode].operand[2].mode;
19798 mode2 = insn_data[icode].operand[3].mode;
19800 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19801 op0 = copy_to_mode_reg (mode0, op0);
19802 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19803 op1 = copy_to_mode_reg (mode1, op1);
19804 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19806 /* @@@ better error message */
19807 error ("selector must be an immediate");
19811 || GET_MODE (target) != tmode
19812 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19813 target = gen_reg_rtx (tmode);
19814 pat = GEN_FCN (icode) (target, op0, op1, op2);
19820 case ARM_BUILTIN_SETWCX:
19821 arg0 = CALL_EXPR_ARG (exp, 0);
19822 arg1 = CALL_EXPR_ARG (exp, 1);
19823 op0 = force_reg (SImode, expand_normal (arg0));
19824 op1 = expand_normal (arg1);
19825 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19828 case ARM_BUILTIN_GETWCX:
19829 arg0 = CALL_EXPR_ARG (exp, 0);
19830 op0 = expand_normal (arg0);
19831 target = gen_reg_rtx (SImode);
19832 emit_insn (gen_iwmmxt_tmrc (target, op0));
19835 case ARM_BUILTIN_WSHUFH:
19836 icode = CODE_FOR_iwmmxt_wshufh;
19837 arg0 = CALL_EXPR_ARG (exp, 0);
19838 arg1 = CALL_EXPR_ARG (exp, 1);
19839 op0 = expand_normal (arg0);
19840 op1 = expand_normal (arg1);
19841 tmode = insn_data[icode].operand[0].mode;
19842 mode1 = insn_data[icode].operand[1].mode;
19843 mode2 = insn_data[icode].operand[2].mode;
19845 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19846 op0 = copy_to_mode_reg (mode1, op0);
19847 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19849 /* @@@ better error message */
19850 error ("mask must be an immediate");
19854 || GET_MODE (target) != tmode
19855 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19856 target = gen_reg_rtx (tmode);
19857 pat = GEN_FCN (icode) (target, op0, op1);
19863 case ARM_BUILTIN_WSADB:
19864 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19865 case ARM_BUILTIN_WSADH:
19866 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19867 case ARM_BUILTIN_WSADBZ:
19868 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19869 case ARM_BUILTIN_WSADHZ:
19870 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19872 /* Several three-argument builtins. */
19873 case ARM_BUILTIN_WMACS:
19874 case ARM_BUILTIN_WMACU:
19875 case ARM_BUILTIN_WALIGN:
19876 case ARM_BUILTIN_TMIA:
19877 case ARM_BUILTIN_TMIAPH:
19878 case ARM_BUILTIN_TMIATT:
19879 case ARM_BUILTIN_TMIATB:
19880 case ARM_BUILTIN_TMIABT:
19881 case ARM_BUILTIN_TMIABB:
19882 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19883 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19884 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19885 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19886 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19887 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19888 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19889 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19890 : CODE_FOR_iwmmxt_walign);
19891 arg0 = CALL_EXPR_ARG (exp, 0);
19892 arg1 = CALL_EXPR_ARG (exp, 1);
19893 arg2 = CALL_EXPR_ARG (exp, 2);
19894 op0 = expand_normal (arg0);
19895 op1 = expand_normal (arg1);
19896 op2 = expand_normal (arg2);
19897 tmode = insn_data[icode].operand[0].mode;
19898 mode0 = insn_data[icode].operand[1].mode;
19899 mode1 = insn_data[icode].operand[2].mode;
19900 mode2 = insn_data[icode].operand[3].mode;
19902 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19903 op0 = copy_to_mode_reg (mode0, op0);
19904 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19905 op1 = copy_to_mode_reg (mode1, op1);
19906 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19907 op2 = copy_to_mode_reg (mode2, op2);
19909 || GET_MODE (target) != tmode
19910 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19911 target = gen_reg_rtx (tmode);
19912 pat = GEN_FCN (icode) (target, op0, op1, op2);
19918 case ARM_BUILTIN_WZERO:
19919 target = gen_reg_rtx (DImode);
19920 emit_insn (gen_iwmmxt_clrdi (target));
19923 case ARM_BUILTIN_THREAD_POINTER:
19924 return arm_load_tp (target);
19930 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19931 if (d->code == (const enum arm_builtins) fcode)
19932 return arm_expand_binop_builtin (d->icode, exp, target);
19934 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19935 if (d->code == (const enum arm_builtins) fcode)
19936 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19938 /* @@@ Should really do something sensible here. */
19942 /* Return the number (counting from 0) of
19943 the least significant set bit in MASK. */
19946 number_of_first_bit_set (unsigned mask)
19951 (mask & (1 << bit)) == 0;
19958 /* Emit code to push or pop registers to or from the stack. F is the
19959 assembly file. MASK is the registers to push or pop. PUSH is
19960 nonzero if we should push, and zero if we should pop. For debugging
19961 output, if pushing, adjust CFA_OFFSET by the amount of space added
19962 to the stack. REAL_REGS should have the same number of bits set as
19963 MASK, and will be used instead (in the same order) to describe which
19964 registers were saved - this is used to mark the save slots when we
19965 push high registers after moving them to low registers. */
19967 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19968 unsigned long real_regs)
19971 int lo_mask = mask & 0xFF;
19972 int pushed_words = 0;
19976 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19978 /* Special case. Do not generate a POP PC statement here, do it in
19980 thumb_exit (f, -1);
19984 if (push && arm_except_unwind_info (&global_options) == UI_TARGET)
19986 fprintf (f, "\t.save\t{");
19987 for (regno = 0; regno < 15; regno++)
19989 if (real_regs & (1 << regno))
19991 if (real_regs & ((1 << regno) -1))
19993 asm_fprintf (f, "%r", regno);
19996 fprintf (f, "}\n");
19999 fprintf (f, "\t%s\t{", push ? "push" : "pop");
20001 /* Look at the low registers first. */
20002 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
20006 asm_fprintf (f, "%r", regno);
20008 if ((lo_mask & ~1) != 0)
20015 if (push && (mask & (1 << LR_REGNUM)))
20017 /* Catch pushing the LR. */
20021 asm_fprintf (f, "%r", LR_REGNUM);
20025 else if (!push && (mask & (1 << PC_REGNUM)))
20027 /* Catch popping the PC. */
20028 if (TARGET_INTERWORK || TARGET_BACKTRACE
20029 || crtl->calls_eh_return)
20031 /* The PC is never poped directly, instead
20032 it is popped into r3 and then BX is used. */
20033 fprintf (f, "}\n");
20035 thumb_exit (f, -1);
20044 asm_fprintf (f, "%r", PC_REGNUM);
20048 fprintf (f, "}\n");
20050 if (push && pushed_words && dwarf2out_do_frame ())
20052 char *l = dwarf2out_cfi_label (false);
20053 int pushed_mask = real_regs;
20055 *cfa_offset += pushed_words * 4;
20056 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
20059 pushed_mask = real_regs;
20060 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
20062 if (pushed_mask & 1)
20063 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
20068 /* Generate code to return from a thumb function.
20069 If 'reg_containing_return_addr' is -1, then the return address is
20070 actually on the stack, at the stack pointer. */
20072 thumb_exit (FILE *f, int reg_containing_return_addr)
20074 unsigned regs_available_for_popping;
20075 unsigned regs_to_pop;
20077 unsigned available;
20081 int restore_a4 = FALSE;
20083 /* Compute the registers we need to pop. */
20087 if (reg_containing_return_addr == -1)
20089 regs_to_pop |= 1 << LR_REGNUM;
20093 if (TARGET_BACKTRACE)
20095 /* Restore the (ARM) frame pointer and stack pointer. */
20096 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
20100 /* If there is nothing to pop then just emit the BX instruction and
20102 if (pops_needed == 0)
20104 if (crtl->calls_eh_return)
20105 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20107 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20110 /* Otherwise if we are not supporting interworking and we have not created
20111 a backtrace structure and the function was not entered in ARM mode then
20112 just pop the return address straight into the PC. */
20113 else if (!TARGET_INTERWORK
20114 && !TARGET_BACKTRACE
20115 && !is_called_in_ARM_mode (current_function_decl)
20116 && !crtl->calls_eh_return)
20118 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
20122 /* Find out how many of the (return) argument registers we can corrupt. */
20123 regs_available_for_popping = 0;
20125 /* If returning via __builtin_eh_return, the bottom three registers
20126 all contain information needed for the return. */
20127 if (crtl->calls_eh_return)
20131 /* If we can deduce the registers used from the function's
20132 return value. This is more reliable that examining
20133 df_regs_ever_live_p () because that will be set if the register is
20134 ever used in the function, not just if the register is used
20135 to hold a return value. */
20137 if (crtl->return_rtx != 0)
20138 mode = GET_MODE (crtl->return_rtx);
20140 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20142 size = GET_MODE_SIZE (mode);
20146 /* In a void function we can use any argument register.
20147 In a function that returns a structure on the stack
20148 we can use the second and third argument registers. */
20149 if (mode == VOIDmode)
20150 regs_available_for_popping =
20151 (1 << ARG_REGISTER (1))
20152 | (1 << ARG_REGISTER (2))
20153 | (1 << ARG_REGISTER (3));
20155 regs_available_for_popping =
20156 (1 << ARG_REGISTER (2))
20157 | (1 << ARG_REGISTER (3));
20159 else if (size <= 4)
20160 regs_available_for_popping =
20161 (1 << ARG_REGISTER (2))
20162 | (1 << ARG_REGISTER (3));
20163 else if (size <= 8)
20164 regs_available_for_popping =
20165 (1 << ARG_REGISTER (3));
20168 /* Match registers to be popped with registers into which we pop them. */
20169 for (available = regs_available_for_popping,
20170 required = regs_to_pop;
20171 required != 0 && available != 0;
20172 available &= ~(available & - available),
20173 required &= ~(required & - required))
20176 /* If we have any popping registers left over, remove them. */
20178 regs_available_for_popping &= ~available;
20180 /* Otherwise if we need another popping register we can use
20181 the fourth argument register. */
20182 else if (pops_needed)
20184 /* If we have not found any free argument registers and
20185 reg a4 contains the return address, we must move it. */
20186 if (regs_available_for_popping == 0
20187 && reg_containing_return_addr == LAST_ARG_REGNUM)
20189 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20190 reg_containing_return_addr = LR_REGNUM;
20192 else if (size > 12)
20194 /* Register a4 is being used to hold part of the return value,
20195 but we have dire need of a free, low register. */
20198 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
20201 if (reg_containing_return_addr != LAST_ARG_REGNUM)
20203 /* The fourth argument register is available. */
20204 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
20210 /* Pop as many registers as we can. */
20211 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20212 regs_available_for_popping);
20214 /* Process the registers we popped. */
20215 if (reg_containing_return_addr == -1)
20217 /* The return address was popped into the lowest numbered register. */
20218 regs_to_pop &= ~(1 << LR_REGNUM);
20220 reg_containing_return_addr =
20221 number_of_first_bit_set (regs_available_for_popping);
20223 /* Remove this register for the mask of available registers, so that
20224 the return address will not be corrupted by further pops. */
20225 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
20228 /* If we popped other registers then handle them here. */
20229 if (regs_available_for_popping)
20233 /* Work out which register currently contains the frame pointer. */
20234 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
20236 /* Move it into the correct place. */
20237 asm_fprintf (f, "\tmov\t%r, %r\n",
20238 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
20240 /* (Temporarily) remove it from the mask of popped registers. */
20241 regs_available_for_popping &= ~(1 << frame_pointer);
20242 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
20244 if (regs_available_for_popping)
20248 /* We popped the stack pointer as well,
20249 find the register that contains it. */
20250 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
20252 /* Move it into the stack register. */
20253 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
20255 /* At this point we have popped all necessary registers, so
20256 do not worry about restoring regs_available_for_popping
20257 to its correct value:
20259 assert (pops_needed == 0)
20260 assert (regs_available_for_popping == (1 << frame_pointer))
20261 assert (regs_to_pop == (1 << STACK_POINTER)) */
20265 /* Since we have just move the popped value into the frame
20266 pointer, the popping register is available for reuse, and
20267 we know that we still have the stack pointer left to pop. */
20268 regs_available_for_popping |= (1 << frame_pointer);
20272 /* If we still have registers left on the stack, but we no longer have
20273 any registers into which we can pop them, then we must move the return
20274 address into the link register and make available the register that
20276 if (regs_available_for_popping == 0 && pops_needed > 0)
20278 regs_available_for_popping |= 1 << reg_containing_return_addr;
20280 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
20281 reg_containing_return_addr);
20283 reg_containing_return_addr = LR_REGNUM;
20286 /* If we have registers left on the stack then pop some more.
20287 We know that at most we will want to pop FP and SP. */
20288 if (pops_needed > 0)
20293 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20294 regs_available_for_popping);
20296 /* We have popped either FP or SP.
20297 Move whichever one it is into the correct register. */
20298 popped_into = number_of_first_bit_set (regs_available_for_popping);
20299 move_to = number_of_first_bit_set (regs_to_pop);
20301 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
20303 regs_to_pop &= ~(1 << move_to);
20308 /* If we still have not popped everything then we must have only
20309 had one register available to us and we are now popping the SP. */
20310 if (pops_needed > 0)
20314 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20315 regs_available_for_popping);
20317 popped_into = number_of_first_bit_set (regs_available_for_popping);
20319 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
20321 assert (regs_to_pop == (1 << STACK_POINTER))
20322 assert (pops_needed == 1)
20326 /* If necessary restore the a4 register. */
20329 if (reg_containing_return_addr != LR_REGNUM)
20331 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20332 reg_containing_return_addr = LR_REGNUM;
20335 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
20338 if (crtl->calls_eh_return)
20339 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20341 /* Return to caller. */
20342 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20345 /* Scan INSN just before assembler is output for it.
20346 For Thumb-1, we track the status of the condition codes; this
20347 information is used in the cbranchsi4_insn pattern. */
20349 thumb1_final_prescan_insn (rtx insn)
20351 if (flag_print_asm_name)
20352 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
20353 INSN_ADDRESSES (INSN_UID (insn)));
20354 /* Don't overwrite the previous setter when we get to a cbranch. */
20355 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
20357 enum attr_conds conds;
20359 if (cfun->machine->thumb1_cc_insn)
20361 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
20362 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
20365 conds = get_attr_conds (insn);
20366 if (conds == CONDS_SET)
20368 rtx set = single_set (insn);
20369 cfun->machine->thumb1_cc_insn = insn;
20370 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20371 cfun->machine->thumb1_cc_op1 = const0_rtx;
20372 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20373 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20375 rtx src1 = XEXP (SET_SRC (set), 1);
20376 if (src1 == const0_rtx)
20377 cfun->machine->thumb1_cc_mode = CCmode;
20380 else if (conds != CONDS_NOCOND)
20381 cfun->machine->thumb1_cc_insn = NULL_RTX;
20386 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
20388 unsigned HOST_WIDE_INT mask = 0xff;
20391 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
20392 if (val == 0) /* XXX */
20395 for (i = 0; i < 25; i++)
20396 if ((val & (mask << i)) == val)
20402 /* Returns nonzero if the current function contains,
20403 or might contain a far jump. */
20405 thumb_far_jump_used_p (void)
20409 /* This test is only important for leaf functions. */
20410 /* assert (!leaf_function_p ()); */
20412 /* If we have already decided that far jumps may be used,
20413 do not bother checking again, and always return true even if
20414 it turns out that they are not being used. Once we have made
20415 the decision that far jumps are present (and that hence the link
20416 register will be pushed onto the stack) we cannot go back on it. */
20417 if (cfun->machine->far_jump_used)
20420 /* If this function is not being called from the prologue/epilogue
20421 generation code then it must be being called from the
20422 INITIAL_ELIMINATION_OFFSET macro. */
20423 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
20425 /* In this case we know that we are being asked about the elimination
20426 of the arg pointer register. If that register is not being used,
20427 then there are no arguments on the stack, and we do not have to
20428 worry that a far jump might force the prologue to push the link
20429 register, changing the stack offsets. In this case we can just
20430 return false, since the presence of far jumps in the function will
20431 not affect stack offsets.
20433 If the arg pointer is live (or if it was live, but has now been
20434 eliminated and so set to dead) then we do have to test to see if
20435 the function might contain a far jump. This test can lead to some
20436 false negatives, since before reload is completed, then length of
20437 branch instructions is not known, so gcc defaults to returning their
20438 longest length, which in turn sets the far jump attribute to true.
20440 A false negative will not result in bad code being generated, but it
20441 will result in a needless push and pop of the link register. We
20442 hope that this does not occur too often.
20444 If we need doubleword stack alignment this could affect the other
20445 elimination offsets so we can't risk getting it wrong. */
20446 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
20447 cfun->machine->arg_pointer_live = 1;
20448 else if (!cfun->machine->arg_pointer_live)
20452 /* Check to see if the function contains a branch
20453 insn with the far jump attribute set. */
20454 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20456 if (GET_CODE (insn) == JUMP_INSN
20457 /* Ignore tablejump patterns. */
20458 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20459 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20460 && get_attr_far_jump (insn) == FAR_JUMP_YES
20463 /* Record the fact that we have decided that
20464 the function does use far jumps. */
20465 cfun->machine->far_jump_used = 1;
20473 /* Return nonzero if FUNC must be entered in ARM mode. */
20475 is_called_in_ARM_mode (tree func)
20477 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20479 /* Ignore the problem about functions whose address is taken. */
20480 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20484 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20490 /* Given the stack offsets and register mask in OFFSETS, decide how
20491 many additional registers to push instead of subtracting a constant
20492 from SP. For epilogues the principle is the same except we use pop.
20493 FOR_PROLOGUE indicates which we're generating. */
20495 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20497 HOST_WIDE_INT amount;
20498 unsigned long live_regs_mask = offsets->saved_regs_mask;
20499 /* Extract a mask of the ones we can give to the Thumb's push/pop
20501 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20502 /* Then count how many other high registers will need to be pushed. */
20503 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20504 int n_free, reg_base;
20506 if (!for_prologue && frame_pointer_needed)
20507 amount = offsets->locals_base - offsets->saved_regs;
20509 amount = offsets->outgoing_args - offsets->saved_regs;
20511 /* If the stack frame size is 512 exactly, we can save one load
20512 instruction, which should make this a win even when optimizing
20514 if (!optimize_size && amount != 512)
20517 /* Can't do this if there are high registers to push. */
20518 if (high_regs_pushed != 0)
20521 /* Shouldn't do it in the prologue if no registers would normally
20522 be pushed at all. In the epilogue, also allow it if we'll have
20523 a pop insn for the PC. */
20526 || TARGET_BACKTRACE
20527 || (live_regs_mask & 1 << LR_REGNUM) == 0
20528 || TARGET_INTERWORK
20529 || crtl->args.pretend_args_size != 0))
20532 /* Don't do this if thumb_expand_prologue wants to emit instructions
20533 between the push and the stack frame allocation. */
20535 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20536 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20543 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20544 live_regs_mask >>= reg_base;
20547 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20548 && (for_prologue || call_used_regs[reg_base + n_free]))
20550 live_regs_mask >>= 1;
20556 gcc_assert (amount / 4 * 4 == amount);
20558 if (amount >= 512 && (amount - n_free * 4) < 512)
20559 return (amount - 508) / 4;
20560 if (amount <= n_free * 4)
20565 /* The bits which aren't usefully expanded as rtl. */
20567 thumb_unexpanded_epilogue (void)
20569 arm_stack_offsets *offsets;
20571 unsigned long live_regs_mask = 0;
20572 int high_regs_pushed = 0;
20574 int had_to_push_lr;
20577 if (cfun->machine->return_used_this_function != 0)
20580 if (IS_NAKED (arm_current_func_type ()))
20583 offsets = arm_get_frame_offsets ();
20584 live_regs_mask = offsets->saved_regs_mask;
20585 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20587 /* If we can deduce the registers used from the function's return value.
20588 This is more reliable that examining df_regs_ever_live_p () because that
20589 will be set if the register is ever used in the function, not just if
20590 the register is used to hold a return value. */
20591 size = arm_size_return_regs ();
20593 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20596 unsigned long extra_mask = (1 << extra_pop) - 1;
20597 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20600 /* The prolog may have pushed some high registers to use as
20601 work registers. e.g. the testsuite file:
20602 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20603 compiles to produce:
20604 push {r4, r5, r6, r7, lr}
20608 as part of the prolog. We have to undo that pushing here. */
20610 if (high_regs_pushed)
20612 unsigned long mask = live_regs_mask & 0xff;
20615 /* The available low registers depend on the size of the value we are
20623 /* Oh dear! We have no low registers into which we can pop
20626 ("no low registers available for popping high registers");
20628 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20629 if (live_regs_mask & (1 << next_hi_reg))
20632 while (high_regs_pushed)
20634 /* Find lo register(s) into which the high register(s) can
20636 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20638 if (mask & (1 << regno))
20639 high_regs_pushed--;
20640 if (high_regs_pushed == 0)
20644 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20646 /* Pop the values into the low register(s). */
20647 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20649 /* Move the value(s) into the high registers. */
20650 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20652 if (mask & (1 << regno))
20654 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20657 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20658 if (live_regs_mask & (1 << next_hi_reg))
20663 live_regs_mask &= ~0x0f00;
20666 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20667 live_regs_mask &= 0xff;
20669 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20671 /* Pop the return address into the PC. */
20672 if (had_to_push_lr)
20673 live_regs_mask |= 1 << PC_REGNUM;
20675 /* Either no argument registers were pushed or a backtrace
20676 structure was created which includes an adjusted stack
20677 pointer, so just pop everything. */
20678 if (live_regs_mask)
20679 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20682 /* We have either just popped the return address into the
20683 PC or it is was kept in LR for the entire function.
20684 Note that thumb_pushpop has already called thumb_exit if the
20685 PC was in the list. */
20686 if (!had_to_push_lr)
20687 thumb_exit (asm_out_file, LR_REGNUM);
20691 /* Pop everything but the return address. */
20692 if (live_regs_mask)
20693 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20696 if (had_to_push_lr)
20700 /* We have no free low regs, so save one. */
20701 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20705 /* Get the return address into a temporary register. */
20706 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20707 1 << LAST_ARG_REGNUM);
20711 /* Move the return address to lr. */
20712 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20714 /* Restore the low register. */
20715 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20720 regno = LAST_ARG_REGNUM;
20725 /* Remove the argument registers that were pushed onto the stack. */
20726 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20727 SP_REGNUM, SP_REGNUM,
20728 crtl->args.pretend_args_size);
20730 thumb_exit (asm_out_file, regno);
20736 /* Functions to save and restore machine-specific function data. */
20737 static struct machine_function *
20738 arm_init_machine_status (void)
20740 struct machine_function *machine;
20741 machine = ggc_alloc_cleared_machine_function ();
20743 #if ARM_FT_UNKNOWN != 0
20744 machine->func_type = ARM_FT_UNKNOWN;
20749 /* Return an RTX indicating where the return address to the
20750 calling function can be found. */
20752 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20757 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20760 /* Do anything needed before RTL is emitted for each function. */
20762 arm_init_expanders (void)
20764 /* Arrange to initialize and mark the machine per-function status. */
20765 init_machine_status = arm_init_machine_status;
20767 /* This is to stop the combine pass optimizing away the alignment
20768 adjustment of va_arg. */
20769 /* ??? It is claimed that this should not be necessary. */
20771 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20775 /* Like arm_compute_initial_elimination offset. Simpler because there
20776 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20777 to point at the base of the local variables after static stack
20778 space for a function has been allocated. */
20781 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20783 arm_stack_offsets *offsets;
20785 offsets = arm_get_frame_offsets ();
20789 case ARG_POINTER_REGNUM:
20792 case STACK_POINTER_REGNUM:
20793 return offsets->outgoing_args - offsets->saved_args;
20795 case FRAME_POINTER_REGNUM:
20796 return offsets->soft_frame - offsets->saved_args;
20798 case ARM_HARD_FRAME_POINTER_REGNUM:
20799 return offsets->saved_regs - offsets->saved_args;
20801 case THUMB_HARD_FRAME_POINTER_REGNUM:
20802 return offsets->locals_base - offsets->saved_args;
20805 gcc_unreachable ();
20809 case FRAME_POINTER_REGNUM:
20812 case STACK_POINTER_REGNUM:
20813 return offsets->outgoing_args - offsets->soft_frame;
20815 case ARM_HARD_FRAME_POINTER_REGNUM:
20816 return offsets->saved_regs - offsets->soft_frame;
20818 case THUMB_HARD_FRAME_POINTER_REGNUM:
20819 return offsets->locals_base - offsets->soft_frame;
20822 gcc_unreachable ();
20827 gcc_unreachable ();
20831 /* Generate the rest of a function's prologue. */
20833 thumb1_expand_prologue (void)
20837 HOST_WIDE_INT amount;
20838 arm_stack_offsets *offsets;
20839 unsigned long func_type;
20841 unsigned long live_regs_mask;
20843 func_type = arm_current_func_type ();
20845 /* Naked functions don't have prologues. */
20846 if (IS_NAKED (func_type))
20849 if (IS_INTERRUPT (func_type))
20851 error ("interrupt Service Routines cannot be coded in Thumb mode");
20855 offsets = arm_get_frame_offsets ();
20856 live_regs_mask = offsets->saved_regs_mask;
20857 /* Load the pic register before setting the frame pointer,
20858 so we can use r7 as a temporary work register. */
20859 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20860 arm_load_pic_register (live_regs_mask);
20862 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20863 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20864 stack_pointer_rtx);
20866 if (flag_stack_usage_info)
20867 current_function_static_stack_size
20868 = offsets->outgoing_args - offsets->saved_args;
20870 amount = offsets->outgoing_args - offsets->saved_regs;
20871 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20876 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20877 GEN_INT (- amount)));
20878 RTX_FRAME_RELATED_P (insn) = 1;
20884 /* The stack decrement is too big for an immediate value in a single
20885 insn. In theory we could issue multiple subtracts, but after
20886 three of them it becomes more space efficient to place the full
20887 value in the constant pool and load into a register. (Also the
20888 ARM debugger really likes to see only one stack decrement per
20889 function). So instead we look for a scratch register into which
20890 we can load the decrement, and then we subtract this from the
20891 stack pointer. Unfortunately on the thumb the only available
20892 scratch registers are the argument registers, and we cannot use
20893 these as they may hold arguments to the function. Instead we
20894 attempt to locate a call preserved register which is used by this
20895 function. If we can find one, then we know that it will have
20896 been pushed at the start of the prologue and so we can corrupt
20898 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20899 if (live_regs_mask & (1 << regno))
20902 gcc_assert(regno <= LAST_LO_REGNUM);
20904 reg = gen_rtx_REG (SImode, regno);
20906 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20908 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20909 stack_pointer_rtx, reg));
20910 RTX_FRAME_RELATED_P (insn) = 1;
20911 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20912 plus_constant (stack_pointer_rtx,
20914 RTX_FRAME_RELATED_P (dwarf) = 1;
20915 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20919 if (frame_pointer_needed)
20920 thumb_set_frame_pointer (offsets);
20922 /* If we are profiling, make sure no instructions are scheduled before
20923 the call to mcount. Similarly if the user has requested no
20924 scheduling in the prolog. Similarly if we want non-call exceptions
20925 using the EABI unwinder, to prevent faulting instructions from being
20926 swapped with a stack adjustment. */
20927 if (crtl->profile || !TARGET_SCHED_PROLOG
20928 || (arm_except_unwind_info (&global_options) == UI_TARGET
20929 && cfun->can_throw_non_call_exceptions))
20930 emit_insn (gen_blockage ());
20932 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20933 if (live_regs_mask & 0xff)
20934 cfun->machine->lr_save_eliminated = 0;
20939 thumb1_expand_epilogue (void)
20941 HOST_WIDE_INT amount;
20942 arm_stack_offsets *offsets;
20945 /* Naked functions don't have prologues. */
20946 if (IS_NAKED (arm_current_func_type ()))
20949 offsets = arm_get_frame_offsets ();
20950 amount = offsets->outgoing_args - offsets->saved_regs;
20952 if (frame_pointer_needed)
20954 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20955 amount = offsets->locals_base - offsets->saved_regs;
20957 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20959 gcc_assert (amount >= 0);
20963 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20964 GEN_INT (amount)));
20967 /* r3 is always free in the epilogue. */
20968 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20970 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20971 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20975 /* Emit a USE (stack_pointer_rtx), so that
20976 the stack adjustment will not be deleted. */
20977 emit_insn (gen_prologue_use (stack_pointer_rtx));
20979 if (crtl->profile || !TARGET_SCHED_PROLOG)
20980 emit_insn (gen_blockage ());
20982 /* Emit a clobber for each insn that will be restored in the epilogue,
20983 so that flow2 will get register lifetimes correct. */
20984 for (regno = 0; regno < 13; regno++)
20985 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20986 emit_clobber (gen_rtx_REG (SImode, regno));
20988 if (! df_regs_ever_live_p (LR_REGNUM))
20989 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20993 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20995 arm_stack_offsets *offsets;
20996 unsigned long live_regs_mask = 0;
20997 unsigned long l_mask;
20998 unsigned high_regs_pushed = 0;
20999 int cfa_offset = 0;
21002 if (IS_NAKED (arm_current_func_type ()))
21005 if (is_called_in_ARM_mode (current_function_decl))
21009 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
21010 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
21012 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
21014 /* Generate code sequence to switch us into Thumb mode. */
21015 /* The .code 32 directive has already been emitted by
21016 ASM_DECLARE_FUNCTION_NAME. */
21017 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
21018 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
21020 /* Generate a label, so that the debugger will notice the
21021 change in instruction sets. This label is also used by
21022 the assembler to bypass the ARM code when this function
21023 is called from a Thumb encoded function elsewhere in the
21024 same file. Hence the definition of STUB_NAME here must
21025 agree with the definition in gas/config/tc-arm.c. */
21027 #define STUB_NAME ".real_start_of"
21029 fprintf (f, "\t.code\t16\n");
21031 if (arm_dllexport_name_p (name))
21032 name = arm_strip_name_encoding (name);
21034 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
21035 fprintf (f, "\t.thumb_func\n");
21036 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
21039 if (crtl->args.pretend_args_size)
21041 /* Output unwind directive for the stack adjustment. */
21042 if (arm_except_unwind_info (&global_options) == UI_TARGET)
21043 fprintf (f, "\t.pad #%d\n",
21044 crtl->args.pretend_args_size);
21046 if (cfun->machine->uses_anonymous_args)
21050 fprintf (f, "\tpush\t{");
21052 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
21054 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
21055 regno <= LAST_ARG_REGNUM;
21057 asm_fprintf (f, "%r%s", regno,
21058 regno == LAST_ARG_REGNUM ? "" : ", ");
21060 fprintf (f, "}\n");
21063 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
21064 SP_REGNUM, SP_REGNUM,
21065 crtl->args.pretend_args_size);
21067 /* We don't need to record the stores for unwinding (would it
21068 help the debugger any if we did?), but record the change in
21069 the stack pointer. */
21070 if (dwarf2out_do_frame ())
21072 char *l = dwarf2out_cfi_label (false);
21074 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
21075 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
21079 /* Get the registers we are going to push. */
21080 offsets = arm_get_frame_offsets ();
21081 live_regs_mask = offsets->saved_regs_mask;
21082 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
21083 l_mask = live_regs_mask & 0x40ff;
21084 /* Then count how many other high registers will need to be pushed. */
21085 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21087 if (TARGET_BACKTRACE)
21090 unsigned work_register;
21092 /* We have been asked to create a stack backtrace structure.
21093 The code looks like this:
21097 0 sub SP, #16 Reserve space for 4 registers.
21098 2 push {R7} Push low registers.
21099 4 add R7, SP, #20 Get the stack pointer before the push.
21100 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
21101 8 mov R7, PC Get hold of the start of this code plus 12.
21102 10 str R7, [SP, #16] Store it.
21103 12 mov R7, FP Get hold of the current frame pointer.
21104 14 str R7, [SP, #4] Store it.
21105 16 mov R7, LR Get hold of the current return address.
21106 18 str R7, [SP, #12] Store it.
21107 20 add R7, SP, #16 Point at the start of the backtrace structure.
21108 22 mov FP, R7 Put this value into the frame pointer. */
21110 work_register = thumb_find_work_register (live_regs_mask);
21112 if (arm_except_unwind_info (&global_options) == UI_TARGET)
21113 asm_fprintf (f, "\t.pad #16\n");
21116 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
21117 SP_REGNUM, SP_REGNUM);
21119 if (dwarf2out_do_frame ())
21121 char *l = dwarf2out_cfi_label (false);
21123 cfa_offset = cfa_offset + 16;
21124 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
21129 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
21130 offset = bit_count (l_mask) * UNITS_PER_WORD;
21135 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21136 offset + 16 + crtl->args.pretend_args_size);
21138 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21141 /* Make sure that the instruction fetching the PC is in the right place
21142 to calculate "start of backtrace creation code + 12". */
21145 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21146 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21148 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
21149 ARM_HARD_FRAME_POINTER_REGNUM);
21150 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21155 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
21156 ARM_HARD_FRAME_POINTER_REGNUM);
21157 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21159 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21160 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21164 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
21165 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21167 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21169 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
21170 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
21172 /* Optimization: If we are not pushing any low registers but we are going
21173 to push some high registers then delay our first push. This will just
21174 be a push of LR and we can combine it with the push of the first high
21176 else if ((l_mask & 0xff) != 0
21177 || (high_regs_pushed == 0 && l_mask))
21179 unsigned long mask = l_mask;
21180 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
21181 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
21184 if (high_regs_pushed)
21186 unsigned pushable_regs;
21187 unsigned next_hi_reg;
21189 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
21190 if (live_regs_mask & (1 << next_hi_reg))
21193 pushable_regs = l_mask & 0xff;
21195 if (pushable_regs == 0)
21196 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
21198 while (high_regs_pushed > 0)
21200 unsigned long real_regs_mask = 0;
21202 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
21204 if (pushable_regs & (1 << regno))
21206 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
21208 high_regs_pushed --;
21209 real_regs_mask |= (1 << next_hi_reg);
21211 if (high_regs_pushed)
21213 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
21215 if (live_regs_mask & (1 << next_hi_reg))
21220 pushable_regs &= ~((1 << regno) - 1);
21226 /* If we had to find a work register and we have not yet
21227 saved the LR then add it to the list of regs to push. */
21228 if (l_mask == (1 << LR_REGNUM))
21230 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
21232 real_regs_mask | (1 << LR_REGNUM));
21236 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
21241 /* Handle the case of a double word load into a low register from
21242 a computed memory address. The computed address may involve a
21243 register which is overwritten by the load. */
21245 thumb_load_double_from_address (rtx *operands)
21253 gcc_assert (GET_CODE (operands[0]) == REG);
21254 gcc_assert (GET_CODE (operands[1]) == MEM);
21256 /* Get the memory address. */
21257 addr = XEXP (operands[1], 0);
21259 /* Work out how the memory address is computed. */
21260 switch (GET_CODE (addr))
21263 operands[2] = adjust_address (operands[1], SImode, 4);
21265 if (REGNO (operands[0]) == REGNO (addr))
21267 output_asm_insn ("ldr\t%H0, %2", operands);
21268 output_asm_insn ("ldr\t%0, %1", operands);
21272 output_asm_insn ("ldr\t%0, %1", operands);
21273 output_asm_insn ("ldr\t%H0, %2", operands);
21278 /* Compute <address> + 4 for the high order load. */
21279 operands[2] = adjust_address (operands[1], SImode, 4);
21281 output_asm_insn ("ldr\t%0, %1", operands);
21282 output_asm_insn ("ldr\t%H0, %2", operands);
21286 arg1 = XEXP (addr, 0);
21287 arg2 = XEXP (addr, 1);
21289 if (CONSTANT_P (arg1))
21290 base = arg2, offset = arg1;
21292 base = arg1, offset = arg2;
21294 gcc_assert (GET_CODE (base) == REG);
21296 /* Catch the case of <address> = <reg> + <reg> */
21297 if (GET_CODE (offset) == REG)
21299 int reg_offset = REGNO (offset);
21300 int reg_base = REGNO (base);
21301 int reg_dest = REGNO (operands[0]);
21303 /* Add the base and offset registers together into the
21304 higher destination register. */
21305 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
21306 reg_dest + 1, reg_base, reg_offset);
21308 /* Load the lower destination register from the address in
21309 the higher destination register. */
21310 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
21311 reg_dest, reg_dest + 1);
21313 /* Load the higher destination register from its own address
21315 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
21316 reg_dest + 1, reg_dest + 1);
21320 /* Compute <address> + 4 for the high order load. */
21321 operands[2] = adjust_address (operands[1], SImode, 4);
21323 /* If the computed address is held in the low order register
21324 then load the high order register first, otherwise always
21325 load the low order register first. */
21326 if (REGNO (operands[0]) == REGNO (base))
21328 output_asm_insn ("ldr\t%H0, %2", operands);
21329 output_asm_insn ("ldr\t%0, %1", operands);
21333 output_asm_insn ("ldr\t%0, %1", operands);
21334 output_asm_insn ("ldr\t%H0, %2", operands);
21340 /* With no registers to worry about we can just load the value
21342 operands[2] = adjust_address (operands[1], SImode, 4);
21344 output_asm_insn ("ldr\t%H0, %2", operands);
21345 output_asm_insn ("ldr\t%0, %1", operands);
21349 gcc_unreachable ();
21356 thumb_output_move_mem_multiple (int n, rtx *operands)
21363 if (REGNO (operands[4]) > REGNO (operands[5]))
21366 operands[4] = operands[5];
21369 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
21370 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
21374 if (REGNO (operands[4]) > REGNO (operands[5]))
21377 operands[4] = operands[5];
21380 if (REGNO (operands[5]) > REGNO (operands[6]))
21383 operands[5] = operands[6];
21386 if (REGNO (operands[4]) > REGNO (operands[5]))
21389 operands[4] = operands[5];
21393 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
21394 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
21398 gcc_unreachable ();
21404 /* Output a call-via instruction for thumb state. */
21406 thumb_call_via_reg (rtx reg)
21408 int regno = REGNO (reg);
21411 gcc_assert (regno < LR_REGNUM);
21413 /* If we are in the normal text section we can use a single instance
21414 per compilation unit. If we are doing function sections, then we need
21415 an entry per section, since we can't rely on reachability. */
21416 if (in_section == text_section)
21418 thumb_call_reg_needed = 1;
21420 if (thumb_call_via_label[regno] == NULL)
21421 thumb_call_via_label[regno] = gen_label_rtx ();
21422 labelp = thumb_call_via_label + regno;
21426 if (cfun->machine->call_via[regno] == NULL)
21427 cfun->machine->call_via[regno] = gen_label_rtx ();
21428 labelp = cfun->machine->call_via + regno;
21431 output_asm_insn ("bl\t%a0", labelp);
21435 /* Routines for generating rtl. */
21437 thumb_expand_movmemqi (rtx *operands)
21439 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
21440 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
21441 HOST_WIDE_INT len = INTVAL (operands[2]);
21442 HOST_WIDE_INT offset = 0;
21446 emit_insn (gen_movmem12b (out, in, out, in));
21452 emit_insn (gen_movmem8b (out, in, out, in));
21458 rtx reg = gen_reg_rtx (SImode);
21459 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21460 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21467 rtx reg = gen_reg_rtx (HImode);
21468 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21469 plus_constant (in, offset))));
21470 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21478 rtx reg = gen_reg_rtx (QImode);
21479 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21480 plus_constant (in, offset))));
21481 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21487 thumb_reload_out_hi (rtx *operands)
21489 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21492 /* Handle reading a half-word from memory during reload. */
21494 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21496 gcc_unreachable ();
21499 /* Return the length of a function name prefix
21500 that starts with the character 'c'. */
21502 arm_get_strip_length (int c)
21506 ARM_NAME_ENCODING_LENGTHS
21511 /* Return a pointer to a function's name with any
21512 and all prefix encodings stripped from it. */
21514 arm_strip_name_encoding (const char *name)
21518 while ((skip = arm_get_strip_length (* name)))
21524 /* If there is a '*' anywhere in the name's prefix, then
21525 emit the stripped name verbatim, otherwise prepend an
21526 underscore if leading underscores are being used. */
21528 arm_asm_output_labelref (FILE *stream, const char *name)
21533 while ((skip = arm_get_strip_length (* name)))
21535 verbatim |= (*name == '*');
21540 fputs (name, stream);
21542 asm_fprintf (stream, "%U%s", name);
21546 arm_file_start (void)
21550 if (TARGET_UNIFIED_ASM)
21551 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21555 const char *fpu_name;
21556 if (arm_selected_arch)
21557 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21559 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21561 if (TARGET_SOFT_FLOAT)
21564 fpu_name = "softvfp";
21566 fpu_name = "softfpa";
21570 fpu_name = arm_fpu_desc->name;
21571 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21573 if (TARGET_HARD_FLOAT)
21574 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21575 if (TARGET_HARD_FLOAT_ABI)
21576 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21579 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21581 /* Some of these attributes only apply when the corresponding features
21582 are used. However we don't have any easy way of figuring this out.
21583 Conservatively record the setting that would have been used. */
21585 /* Tag_ABI_FP_rounding. */
21586 if (flag_rounding_math)
21587 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21588 if (!flag_unsafe_math_optimizations)
21590 /* Tag_ABI_FP_denomal. */
21591 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21592 /* Tag_ABI_FP_exceptions. */
21593 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21595 /* Tag_ABI_FP_user_exceptions. */
21596 if (flag_signaling_nans)
21597 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21598 /* Tag_ABI_FP_number_model. */
21599 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21600 flag_finite_math_only ? 1 : 3);
21602 /* Tag_ABI_align8_needed. */
21603 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21604 /* Tag_ABI_align8_preserved. */
21605 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21606 /* Tag_ABI_enum_size. */
21607 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21608 flag_short_enums ? 1 : 2);
21610 /* Tag_ABI_optimization_goals. */
21613 else if (optimize >= 2)
21619 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21621 /* Tag_ABI_FP_16bit_format. */
21622 if (arm_fp16_format)
21623 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21624 (int)arm_fp16_format);
21626 if (arm_lang_output_object_attributes_hook)
21627 arm_lang_output_object_attributes_hook();
21629 default_file_start();
21633 arm_file_end (void)
21637 if (NEED_INDICATE_EXEC_STACK)
21638 /* Add .note.GNU-stack. */
21639 file_end_indicate_exec_stack ();
21641 if (! thumb_call_reg_needed)
21644 switch_to_section (text_section);
21645 asm_fprintf (asm_out_file, "\t.code 16\n");
21646 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21648 for (regno = 0; regno < LR_REGNUM; regno++)
21650 rtx label = thumb_call_via_label[regno];
21654 targetm.asm_out.internal_label (asm_out_file, "L",
21655 CODE_LABEL_NUMBER (label));
21656 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21662 /* Symbols in the text segment can be accessed without indirecting via the
21663 constant pool; it may take an extra binary operation, but this is still
21664 faster than indirecting via memory. Don't do this when not optimizing,
21665 since we won't be calculating al of the offsets necessary to do this
21669 arm_encode_section_info (tree decl, rtx rtl, int first)
21671 if (optimize > 0 && TREE_CONSTANT (decl))
21672 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21674 default_encode_section_info (decl, rtl, first);
21676 #endif /* !ARM_PE */
21679 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21681 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21682 && !strcmp (prefix, "L"))
21684 arm_ccfsm_state = 0;
21685 arm_target_insn = NULL;
21687 default_internal_label (stream, prefix, labelno);
21690 /* Output code to add DELTA to the first argument, and then jump
21691 to FUNCTION. Used for C++ multiple inheritance. */
21693 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21694 HOST_WIDE_INT delta,
21695 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21698 static int thunk_label = 0;
21701 int mi_delta = delta;
21702 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21704 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21707 mi_delta = - mi_delta;
21711 int labelno = thunk_label++;
21712 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21713 /* Thunks are entered in arm mode when avaiable. */
21714 if (TARGET_THUMB1_ONLY)
21716 /* push r3 so we can use it as a temporary. */
21717 /* TODO: Omit this save if r3 is not used. */
21718 fputs ("\tpush {r3}\n", file);
21719 fputs ("\tldr\tr3, ", file);
21723 fputs ("\tldr\tr12, ", file);
21725 assemble_name (file, label);
21726 fputc ('\n', file);
21729 /* If we are generating PIC, the ldr instruction below loads
21730 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21731 the address of the add + 8, so we have:
21733 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21736 Note that we have "+ 1" because some versions of GNU ld
21737 don't set the low bit of the result for R_ARM_REL32
21738 relocations against thumb function symbols.
21739 On ARMv6M this is +4, not +8. */
21740 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21741 assemble_name (file, labelpc);
21742 fputs (":\n", file);
21743 if (TARGET_THUMB1_ONLY)
21745 /* This is 2 insns after the start of the thunk, so we know it
21746 is 4-byte aligned. */
21747 fputs ("\tadd\tr3, pc, r3\n", file);
21748 fputs ("\tmov r12, r3\n", file);
21751 fputs ("\tadd\tr12, pc, r12\n", file);
21753 else if (TARGET_THUMB1_ONLY)
21754 fputs ("\tmov r12, r3\n", file);
21756 if (TARGET_THUMB1_ONLY)
21758 if (mi_delta > 255)
21760 fputs ("\tldr\tr3, ", file);
21761 assemble_name (file, label);
21762 fputs ("+4\n", file);
21763 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21764 mi_op, this_regno, this_regno);
21766 else if (mi_delta != 0)
21768 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21769 mi_op, this_regno, this_regno,
21775 /* TODO: Use movw/movt for large constants when available. */
21776 while (mi_delta != 0)
21778 if ((mi_delta & (3 << shift)) == 0)
21782 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21783 mi_op, this_regno, this_regno,
21784 mi_delta & (0xff << shift));
21785 mi_delta &= ~(0xff << shift);
21792 if (TARGET_THUMB1_ONLY)
21793 fputs ("\tpop\t{r3}\n", file);
21795 fprintf (file, "\tbx\tr12\n");
21796 ASM_OUTPUT_ALIGN (file, 2);
21797 assemble_name (file, label);
21798 fputs (":\n", file);
21801 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21802 rtx tem = XEXP (DECL_RTL (function), 0);
21803 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21804 tem = gen_rtx_MINUS (GET_MODE (tem),
21806 gen_rtx_SYMBOL_REF (Pmode,
21807 ggc_strdup (labelpc)));
21808 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21811 /* Output ".word .LTHUNKn". */
21812 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21814 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21815 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21819 fputs ("\tb\t", file);
21820 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21821 if (NEED_PLT_RELOC)
21822 fputs ("(PLT)", file);
21823 fputc ('\n', file);
21828 arm_emit_vector_const (FILE *file, rtx x)
21831 const char * pattern;
21833 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21835 switch (GET_MODE (x))
21837 case V2SImode: pattern = "%08x"; break;
21838 case V4HImode: pattern = "%04x"; break;
21839 case V8QImode: pattern = "%02x"; break;
21840 default: gcc_unreachable ();
21843 fprintf (file, "0x");
21844 for (i = CONST_VECTOR_NUNITS (x); i--;)
21848 element = CONST_VECTOR_ELT (x, i);
21849 fprintf (file, pattern, INTVAL (element));
21855 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21856 HFmode constant pool entries are actually loaded with ldr. */
21858 arm_emit_fp16_const (rtx c)
21863 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21864 bits = real_to_target (NULL, &r, HFmode);
21865 if (WORDS_BIG_ENDIAN)
21866 assemble_zeros (2);
21867 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21868 if (!WORDS_BIG_ENDIAN)
21869 assemble_zeros (2);
21873 arm_output_load_gr (rtx *operands)
21880 if (GET_CODE (operands [1]) != MEM
21881 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21882 || GET_CODE (reg = XEXP (sum, 0)) != REG
21883 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21884 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21885 return "wldrw%?\t%0, %1";
21887 /* Fix up an out-of-range load of a GR register. */
21888 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21889 wcgr = operands[0];
21891 output_asm_insn ("ldr%?\t%0, %1", operands);
21893 operands[0] = wcgr;
21895 output_asm_insn ("tmcr%?\t%0, %1", operands);
21896 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21901 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21903 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21904 named arg and all anonymous args onto the stack.
21905 XXX I know the prologue shouldn't be pushing registers, but it is faster
21909 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21910 enum machine_mode mode,
21913 int second_time ATTRIBUTE_UNUSED)
21917 cfun->machine->uses_anonymous_args = 1;
21918 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21920 nregs = pcum->aapcs_ncrn;
21921 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21925 nregs = pcum->nregs;
21927 if (nregs < NUM_ARG_REGS)
21928 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21931 /* Return nonzero if the CONSUMER instruction (a store) does not need
21932 PRODUCER's value to calculate the address. */
21935 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21937 rtx value = PATTERN (producer);
21938 rtx addr = PATTERN (consumer);
21940 if (GET_CODE (value) == COND_EXEC)
21941 value = COND_EXEC_CODE (value);
21942 if (GET_CODE (value) == PARALLEL)
21943 value = XVECEXP (value, 0, 0);
21944 value = XEXP (value, 0);
21945 if (GET_CODE (addr) == COND_EXEC)
21946 addr = COND_EXEC_CODE (addr);
21947 if (GET_CODE (addr) == PARALLEL)
21948 addr = XVECEXP (addr, 0, 0);
21949 addr = XEXP (addr, 0);
21951 return !reg_overlap_mentioned_p (value, addr);
21954 /* Return nonzero if the CONSUMER instruction (a store) does need
21955 PRODUCER's value to calculate the address. */
21958 arm_early_store_addr_dep (rtx producer, rtx consumer)
21960 return !arm_no_early_store_addr_dep (producer, consumer);
21963 /* Return nonzero if the CONSUMER instruction (a load) does need
21964 PRODUCER's value to calculate the address. */
21967 arm_early_load_addr_dep (rtx producer, rtx consumer)
21969 rtx value = PATTERN (producer);
21970 rtx addr = PATTERN (consumer);
21972 if (GET_CODE (value) == COND_EXEC)
21973 value = COND_EXEC_CODE (value);
21974 if (GET_CODE (value) == PARALLEL)
21975 value = XVECEXP (value, 0, 0);
21976 value = XEXP (value, 0);
21977 if (GET_CODE (addr) == COND_EXEC)
21978 addr = COND_EXEC_CODE (addr);
21979 if (GET_CODE (addr) == PARALLEL)
21980 addr = XVECEXP (addr, 0, 0);
21981 addr = XEXP (addr, 1);
21983 return reg_overlap_mentioned_p (value, addr);
21986 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21987 have an early register shift value or amount dependency on the
21988 result of PRODUCER. */
21991 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21993 rtx value = PATTERN (producer);
21994 rtx op = PATTERN (consumer);
21997 if (GET_CODE (value) == COND_EXEC)
21998 value = COND_EXEC_CODE (value);
21999 if (GET_CODE (value) == PARALLEL)
22000 value = XVECEXP (value, 0, 0);
22001 value = XEXP (value, 0);
22002 if (GET_CODE (op) == COND_EXEC)
22003 op = COND_EXEC_CODE (op);
22004 if (GET_CODE (op) == PARALLEL)
22005 op = XVECEXP (op, 0, 0);
22008 early_op = XEXP (op, 0);
22009 /* This is either an actual independent shift, or a shift applied to
22010 the first operand of another operation. We want the whole shift
22012 if (GET_CODE (early_op) == REG)
22015 return !reg_overlap_mentioned_p (value, early_op);
22018 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
22019 have an early register shift value dependency on the result of
22023 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
22025 rtx value = PATTERN (producer);
22026 rtx op = PATTERN (consumer);
22029 if (GET_CODE (value) == COND_EXEC)
22030 value = COND_EXEC_CODE (value);
22031 if (GET_CODE (value) == PARALLEL)
22032 value = XVECEXP (value, 0, 0);
22033 value = XEXP (value, 0);
22034 if (GET_CODE (op) == COND_EXEC)
22035 op = COND_EXEC_CODE (op);
22036 if (GET_CODE (op) == PARALLEL)
22037 op = XVECEXP (op, 0, 0);
22040 early_op = XEXP (op, 0);
22042 /* This is either an actual independent shift, or a shift applied to
22043 the first operand of another operation. We want the value being
22044 shifted, in either case. */
22045 if (GET_CODE (early_op) != REG)
22046 early_op = XEXP (early_op, 0);
22048 return !reg_overlap_mentioned_p (value, early_op);
22051 /* Return nonzero if the CONSUMER (a mul or mac op) does not
22052 have an early register mult dependency on the result of
22056 arm_no_early_mul_dep (rtx producer, rtx consumer)
22058 rtx value = PATTERN (producer);
22059 rtx op = PATTERN (consumer);
22061 if (GET_CODE (value) == COND_EXEC)
22062 value = COND_EXEC_CODE (value);
22063 if (GET_CODE (value) == PARALLEL)
22064 value = XVECEXP (value, 0, 0);
22065 value = XEXP (value, 0);
22066 if (GET_CODE (op) == COND_EXEC)
22067 op = COND_EXEC_CODE (op);
22068 if (GET_CODE (op) == PARALLEL)
22069 op = XVECEXP (op, 0, 0);
22072 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
22074 if (GET_CODE (XEXP (op, 0)) == MULT)
22075 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
22077 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
22083 /* We can't rely on the caller doing the proper promotion when
22084 using APCS or ATPCS. */
22087 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
22089 return !TARGET_AAPCS_BASED;
22092 static enum machine_mode
22093 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
22094 enum machine_mode mode,
22095 int *punsignedp ATTRIBUTE_UNUSED,
22096 const_tree fntype ATTRIBUTE_UNUSED,
22097 int for_return ATTRIBUTE_UNUSED)
22099 if (GET_MODE_CLASS (mode) == MODE_INT
22100 && GET_MODE_SIZE (mode) < 4)
22106 /* AAPCS based ABIs use short enums by default. */
22109 arm_default_short_enums (void)
22111 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
22115 /* AAPCS requires that anonymous bitfields affect structure alignment. */
22118 arm_align_anon_bitfield (void)
22120 return TARGET_AAPCS_BASED;
22124 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
22127 arm_cxx_guard_type (void)
22129 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
22132 /* Return non-zero if the consumer (a multiply-accumulate instruction)
22133 has an accumulator dependency on the result of the producer (a
22134 multiplication instruction) and no other dependency on that result. */
22136 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
22138 rtx mul = PATTERN (producer);
22139 rtx mac = PATTERN (consumer);
22141 rtx mac_op0, mac_op1, mac_acc;
22143 if (GET_CODE (mul) == COND_EXEC)
22144 mul = COND_EXEC_CODE (mul);
22145 if (GET_CODE (mac) == COND_EXEC)
22146 mac = COND_EXEC_CODE (mac);
22148 /* Check that mul is of the form (set (...) (mult ...))
22149 and mla is of the form (set (...) (plus (mult ...) (...))). */
22150 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
22151 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
22152 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
22155 mul_result = XEXP (mul, 0);
22156 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
22157 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
22158 mac_acc = XEXP (XEXP (mac, 1), 1);
22160 return (reg_overlap_mentioned_p (mul_result, mac_acc)
22161 && !reg_overlap_mentioned_p (mul_result, mac_op0)
22162 && !reg_overlap_mentioned_p (mul_result, mac_op1));
22166 /* The EABI says test the least significant bit of a guard variable. */
22169 arm_cxx_guard_mask_bit (void)
22171 return TARGET_AAPCS_BASED;
22175 /* The EABI specifies that all array cookies are 8 bytes long. */
22178 arm_get_cookie_size (tree type)
22182 if (!TARGET_AAPCS_BASED)
22183 return default_cxx_get_cookie_size (type);
22185 size = build_int_cst (sizetype, 8);
22190 /* The EABI says that array cookies should also contain the element size. */
22193 arm_cookie_has_size (void)
22195 return TARGET_AAPCS_BASED;
22199 /* The EABI says constructors and destructors should return a pointer to
22200 the object constructed/destroyed. */
22203 arm_cxx_cdtor_returns_this (void)
22205 return TARGET_AAPCS_BASED;
22208 /* The EABI says that an inline function may never be the key
22212 arm_cxx_key_method_may_be_inline (void)
22214 return !TARGET_AAPCS_BASED;
22218 arm_cxx_determine_class_data_visibility (tree decl)
22220 if (!TARGET_AAPCS_BASED
22221 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
22224 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
22225 is exported. However, on systems without dynamic vague linkage,
22226 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
22227 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
22228 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
22230 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
22231 DECL_VISIBILITY_SPECIFIED (decl) = 1;
22235 arm_cxx_class_data_always_comdat (void)
22237 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22238 vague linkage if the class has no key function. */
22239 return !TARGET_AAPCS_BASED;
22243 /* The EABI says __aeabi_atexit should be used to register static
22247 arm_cxx_use_aeabi_atexit (void)
22249 return TARGET_AAPCS_BASED;
22254 arm_set_return_address (rtx source, rtx scratch)
22256 arm_stack_offsets *offsets;
22257 HOST_WIDE_INT delta;
22259 unsigned long saved_regs;
22261 offsets = arm_get_frame_offsets ();
22262 saved_regs = offsets->saved_regs_mask;
22264 if ((saved_regs & (1 << LR_REGNUM)) == 0)
22265 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22268 if (frame_pointer_needed)
22269 addr = plus_constant(hard_frame_pointer_rtx, -4);
22272 /* LR will be the first saved register. */
22273 delta = offsets->outgoing_args - (offsets->frame + 4);
22278 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
22279 GEN_INT (delta & ~4095)));
22284 addr = stack_pointer_rtx;
22286 addr = plus_constant (addr, delta);
22288 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22294 thumb_set_return_address (rtx source, rtx scratch)
22296 arm_stack_offsets *offsets;
22297 HOST_WIDE_INT delta;
22298 HOST_WIDE_INT limit;
22301 unsigned long mask;
22305 offsets = arm_get_frame_offsets ();
22306 mask = offsets->saved_regs_mask;
22307 if (mask & (1 << LR_REGNUM))
22310 /* Find the saved regs. */
22311 if (frame_pointer_needed)
22313 delta = offsets->soft_frame - offsets->saved_args;
22314 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
22320 delta = offsets->outgoing_args - offsets->saved_args;
22323 /* Allow for the stack frame. */
22324 if (TARGET_THUMB1 && TARGET_BACKTRACE)
22326 /* The link register is always the first saved register. */
22329 /* Construct the address. */
22330 addr = gen_rtx_REG (SImode, reg);
22333 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
22334 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
22338 addr = plus_constant (addr, delta);
22340 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22343 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22346 /* Implements target hook vector_mode_supported_p. */
22348 arm_vector_mode_supported_p (enum machine_mode mode)
22350 /* Neon also supports V2SImode, etc. listed in the clause below. */
22351 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
22352 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
22355 if ((TARGET_NEON || TARGET_IWMMXT)
22356 && ((mode == V2SImode)
22357 || (mode == V4HImode)
22358 || (mode == V8QImode)))
22364 /* Implements target hook array_mode_supported_p. */
22367 arm_array_mode_supported_p (enum machine_mode mode,
22368 unsigned HOST_WIDE_INT nelems)
22371 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
22372 && (nelems >= 2 && nelems <= 4))
22378 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22379 registers when autovectorizing for Neon, at least until multiple vector
22380 widths are supported properly by the middle-end. */
22382 static enum machine_mode
22383 arm_preferred_simd_mode (enum machine_mode mode)
22389 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22391 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22393 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22395 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22397 if (TARGET_NEON_VECTORIZE_QUAD)
22404 if (TARGET_REALLY_IWMMXT)
22420 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22422 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
22423 using r0-r4 for function arguments, r7 for the stack frame and don't have
22424 enough left over to do doubleword arithmetic. For Thumb-2 all the
22425 potentially problematic instructions accept high registers so this is not
22426 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
22427 that require many low registers. */
22429 arm_class_likely_spilled_p (reg_class_t rclass)
22431 if ((TARGET_THUMB1 && rclass == LO_REGS)
22432 || rclass == CC_REG)
22438 /* Implements target hook small_register_classes_for_mode_p. */
22440 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
22442 return TARGET_THUMB1;
22445 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22446 ARM insns and therefore guarantee that the shift count is modulo 256.
22447 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22448 guarantee no particular behavior for out-of-range counts. */
22450 static unsigned HOST_WIDE_INT
22451 arm_shift_truncation_mask (enum machine_mode mode)
22453 return mode == SImode ? 255 : 0;
22457 /* Map internal gcc register numbers to DWARF2 register numbers. */
22460 arm_dbx_register_number (unsigned int regno)
22465 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22466 compatibility. The EABI defines them as registers 96-103. */
22467 if (IS_FPA_REGNUM (regno))
22468 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
22470 if (IS_VFP_REGNUM (regno))
22472 /* See comment in arm_dwarf_register_span. */
22473 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22474 return 64 + regno - FIRST_VFP_REGNUM;
22476 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
22479 if (IS_IWMMXT_GR_REGNUM (regno))
22480 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
22482 if (IS_IWMMXT_REGNUM (regno))
22483 return 112 + regno - FIRST_IWMMXT_REGNUM;
22485 gcc_unreachable ();
22488 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22489 GCC models tham as 64 32-bit registers, so we need to describe this to
22490 the DWARF generation code. Other registers can use the default. */
22492 arm_dwarf_register_span (rtx rtl)
22499 regno = REGNO (rtl);
22500 if (!IS_VFP_REGNUM (regno))
22503 /* XXX FIXME: The EABI defines two VFP register ranges:
22504 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22506 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22507 corresponding D register. Until GDB supports this, we shall use the
22508 legacy encodings. We also use these encodings for D0-D15 for
22509 compatibility with older debuggers. */
22510 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22513 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
22514 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
22515 regno = (regno - FIRST_VFP_REGNUM) / 2;
22516 for (i = 0; i < nregs; i++)
22517 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
22522 #if ARM_UNWIND_INFO
22523 /* Emit unwind directives for a store-multiple instruction or stack pointer
22524 push during alignment.
22525 These should only ever be generated by the function prologue code, so
22526 expect them to have a particular form. */
22529 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
22532 HOST_WIDE_INT offset;
22533 HOST_WIDE_INT nregs;
22539 e = XVECEXP (p, 0, 0);
22540 if (GET_CODE (e) != SET)
22543 /* First insn will adjust the stack pointer. */
22544 if (GET_CODE (e) != SET
22545 || GET_CODE (XEXP (e, 0)) != REG
22546 || REGNO (XEXP (e, 0)) != SP_REGNUM
22547 || GET_CODE (XEXP (e, 1)) != PLUS)
22550 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
22551 nregs = XVECLEN (p, 0) - 1;
22553 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22556 /* The function prologue may also push pc, but not annotate it as it is
22557 never restored. We turn this into a stack pointer adjustment. */
22558 if (nregs * 4 == offset - 4)
22560 fprintf (asm_out_file, "\t.pad #4\n");
22564 fprintf (asm_out_file, "\t.save {");
22566 else if (IS_VFP_REGNUM (reg))
22569 fprintf (asm_out_file, "\t.vsave {");
22571 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22573 /* FPA registers are done differently. */
22574 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22578 /* Unknown register type. */
22581 /* If the stack increment doesn't match the size of the saved registers,
22582 something has gone horribly wrong. */
22583 if (offset != nregs * reg_size)
22588 /* The remaining insns will describe the stores. */
22589 for (i = 1; i <= nregs; i++)
22591 /* Expect (set (mem <addr>) (reg)).
22592 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22593 e = XVECEXP (p, 0, i);
22594 if (GET_CODE (e) != SET
22595 || GET_CODE (XEXP (e, 0)) != MEM
22596 || GET_CODE (XEXP (e, 1)) != REG)
22599 reg = REGNO (XEXP (e, 1));
22604 fprintf (asm_out_file, ", ");
22605 /* We can't use %r for vfp because we need to use the
22606 double precision register names. */
22607 if (IS_VFP_REGNUM (reg))
22608 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22610 asm_fprintf (asm_out_file, "%r", reg);
22612 #ifdef ENABLE_CHECKING
22613 /* Check that the addresses are consecutive. */
22614 e = XEXP (XEXP (e, 0), 0);
22615 if (GET_CODE (e) == PLUS)
22617 offset += reg_size;
22618 if (GET_CODE (XEXP (e, 0)) != REG
22619 || REGNO (XEXP (e, 0)) != SP_REGNUM
22620 || GET_CODE (XEXP (e, 1)) != CONST_INT
22621 || offset != INTVAL (XEXP (e, 1)))
22625 || GET_CODE (e) != REG
22626 || REGNO (e) != SP_REGNUM)
22630 fprintf (asm_out_file, "}\n");
22633 /* Emit unwind directives for a SET. */
22636 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22644 switch (GET_CODE (e0))
22647 /* Pushing a single register. */
22648 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22649 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22650 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22653 asm_fprintf (asm_out_file, "\t.save ");
22654 if (IS_VFP_REGNUM (REGNO (e1)))
22655 asm_fprintf(asm_out_file, "{d%d}\n",
22656 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22658 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22662 if (REGNO (e0) == SP_REGNUM)
22664 /* A stack increment. */
22665 if (GET_CODE (e1) != PLUS
22666 || GET_CODE (XEXP (e1, 0)) != REG
22667 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22668 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22671 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22672 -INTVAL (XEXP (e1, 1)));
22674 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22676 HOST_WIDE_INT offset;
22678 if (GET_CODE (e1) == PLUS)
22680 if (GET_CODE (XEXP (e1, 0)) != REG
22681 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22683 reg = REGNO (XEXP (e1, 0));
22684 offset = INTVAL (XEXP (e1, 1));
22685 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22686 HARD_FRAME_POINTER_REGNUM, reg,
22689 else if (GET_CODE (e1) == REG)
22692 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22693 HARD_FRAME_POINTER_REGNUM, reg);
22698 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22700 /* Move from sp to reg. */
22701 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22703 else if (GET_CODE (e1) == PLUS
22704 && GET_CODE (XEXP (e1, 0)) == REG
22705 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22706 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22708 /* Set reg to offset from sp. */
22709 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22710 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22712 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22714 /* Stack pointer save before alignment. */
22716 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22729 /* Emit unwind directives for the given insn. */
22732 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22736 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22739 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22740 && (TREE_NOTHROW (current_function_decl)
22741 || crtl->all_throwers_are_sibcalls))
22744 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22747 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22749 pat = XEXP (pat, 0);
22751 pat = PATTERN (insn);
22753 switch (GET_CODE (pat))
22756 arm_unwind_emit_set (asm_out_file, pat);
22760 /* Store multiple. */
22761 arm_unwind_emit_sequence (asm_out_file, pat);
22770 /* Output a reference from a function exception table to the type_info
22771 object X. The EABI specifies that the symbol should be relocated by
22772 an R_ARM_TARGET2 relocation. */
22775 arm_output_ttype (rtx x)
22777 fputs ("\t.word\t", asm_out_file);
22778 output_addr_const (asm_out_file, x);
22779 /* Use special relocations for symbol references. */
22780 if (GET_CODE (x) != CONST_INT)
22781 fputs ("(TARGET2)", asm_out_file);
22782 fputc ('\n', asm_out_file);
22787 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22790 arm_asm_emit_except_personality (rtx personality)
22792 fputs ("\t.personality\t", asm_out_file);
22793 output_addr_const (asm_out_file, personality);
22794 fputc ('\n', asm_out_file);
22797 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22800 arm_asm_init_sections (void)
22802 exception_section = get_unnamed_section (0, output_section_asm_op,
22805 #endif /* ARM_UNWIND_INFO */
22807 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22809 static enum unwind_info_type
22810 arm_except_unwind_info (struct gcc_options *opts)
22812 /* Honor the --enable-sjlj-exceptions configure switch. */
22813 #ifdef CONFIG_SJLJ_EXCEPTIONS
22814 if (CONFIG_SJLJ_EXCEPTIONS)
22818 /* If not using ARM EABI unwind tables... */
22819 if (ARM_UNWIND_INFO)
22821 /* For simplicity elsewhere in this file, indicate that all unwind
22822 info is disabled if we're not emitting unwind tables. */
22823 if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
22829 /* ... we use sjlj exceptions for backwards compatibility. */
22834 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22835 stack alignment. */
22838 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22840 rtx unspec = SET_SRC (pattern);
22841 gcc_assert (GET_CODE (unspec) == UNSPEC);
22845 case UNSPEC_STACK_ALIGN:
22846 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22847 put anything on the stack, so hopefully it won't matter.
22848 CFA = SP will be correct after alignment. */
22849 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22850 SET_DEST (pattern));
22853 gcc_unreachable ();
22858 /* Output unwind directives for the start/end of a function. */
22861 arm_output_fn_unwind (FILE * f, bool prologue)
22863 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22867 fputs ("\t.fnstart\n", f);
22870 /* If this function will never be unwound, then mark it as such.
22871 The came condition is used in arm_unwind_emit to suppress
22872 the frame annotations. */
22873 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22874 && (TREE_NOTHROW (current_function_decl)
22875 || crtl->all_throwers_are_sibcalls))
22876 fputs("\t.cantunwind\n", f);
22878 fputs ("\t.fnend\n", f);
22883 arm_emit_tls_decoration (FILE *fp, rtx x)
22885 enum tls_reloc reloc;
22888 val = XVECEXP (x, 0, 0);
22889 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22891 output_addr_const (fp, val);
22896 fputs ("(tlsgd)", fp);
22899 fputs ("(tlsldm)", fp);
22902 fputs ("(tlsldo)", fp);
22905 fputs ("(gottpoff)", fp);
22908 fputs ("(tpoff)", fp);
22911 gcc_unreachable ();
22919 fputs (" + (. - ", fp);
22920 output_addr_const (fp, XVECEXP (x, 0, 2));
22922 output_addr_const (fp, XVECEXP (x, 0, 3));
22932 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22935 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22937 gcc_assert (size == 4);
22938 fputs ("\t.word\t", file);
22939 output_addr_const (file, x);
22940 fputs ("(tlsldo)", file);
22943 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22946 arm_output_addr_const_extra (FILE *fp, rtx x)
22948 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22949 return arm_emit_tls_decoration (fp, x);
22950 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22953 int labelno = INTVAL (XVECEXP (x, 0, 0));
22955 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22956 assemble_name_raw (fp, label);
22960 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22962 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22966 output_addr_const (fp, XVECEXP (x, 0, 0));
22970 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22972 output_addr_const (fp, XVECEXP (x, 0, 0));
22976 output_addr_const (fp, XVECEXP (x, 0, 1));
22980 else if (GET_CODE (x) == CONST_VECTOR)
22981 return arm_emit_vector_const (fp, x);
22986 /* Output assembly for a shift instruction.
22987 SET_FLAGS determines how the instruction modifies the condition codes.
22988 0 - Do not set condition codes.
22989 1 - Set condition codes.
22990 2 - Use smallest instruction. */
22992 arm_output_shift(rtx * operands, int set_flags)
22995 static const char flag_chars[3] = {'?', '.', '!'};
23000 c = flag_chars[set_flags];
23001 if (TARGET_UNIFIED_ASM)
23003 shift = shift_op(operands[3], &val);
23007 operands[2] = GEN_INT(val);
23008 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
23011 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
23014 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
23015 output_asm_insn (pattern, operands);
23019 /* Output a Thumb-1 casesi dispatch sequence. */
23021 thumb1_output_casesi (rtx *operands)
23023 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
23025 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
23027 switch (GET_MODE(diff_vec))
23030 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
23031 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
23033 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
23034 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
23036 return "bl\t%___gnu_thumb1_case_si";
23038 gcc_unreachable ();
23042 /* Output a Thumb-2 casesi instruction. */
23044 thumb2_output_casesi (rtx *operands)
23046 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
23048 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
23050 output_asm_insn ("cmp\t%0, %1", operands);
23051 output_asm_insn ("bhi\t%l3", operands);
23052 switch (GET_MODE(diff_vec))
23055 return "tbb\t[%|pc, %0]";
23057 return "tbh\t[%|pc, %0, lsl #1]";
23061 output_asm_insn ("adr\t%4, %l2", operands);
23062 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
23063 output_asm_insn ("add\t%4, %4, %5", operands);
23068 output_asm_insn ("adr\t%4, %l2", operands);
23069 return "ldr\t%|pc, [%4, %0, lsl #2]";
23072 gcc_unreachable ();
23076 /* Most ARM cores are single issue, but some newer ones can dual issue.
23077 The scheduler descriptions rely on this being correct. */
23079 arm_issue_rate (void)
23097 /* A table and a function to perform ARM-specific name mangling for
23098 NEON vector types in order to conform to the AAPCS (see "Procedure
23099 Call Standard for the ARM Architecture", Appendix A). To qualify
23100 for emission with the mangled names defined in that document, a
23101 vector type must not only be of the correct mode but also be
23102 composed of NEON vector element types (e.g. __builtin_neon_qi). */
23105 enum machine_mode mode;
23106 const char *element_type_name;
23107 const char *aapcs_name;
23108 } arm_mangle_map_entry;
23110 static arm_mangle_map_entry arm_mangle_map[] = {
23111 /* 64-bit containerized types. */
23112 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
23113 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
23114 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
23115 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
23116 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
23117 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
23118 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
23119 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
23120 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
23121 /* 128-bit containerized types. */
23122 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
23123 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
23124 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
23125 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
23126 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
23127 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
23128 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
23129 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
23130 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
23131 { VOIDmode, NULL, NULL }
23135 arm_mangle_type (const_tree type)
23137 arm_mangle_map_entry *pos = arm_mangle_map;
23139 /* The ARM ABI documents (10th October 2008) say that "__va_list"
23140 has to be managled as if it is in the "std" namespace. */
23141 if (TARGET_AAPCS_BASED
23142 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
23144 static bool warned;
23145 if (!warned && warn_psabi && !in_system_header)
23148 inform (input_location,
23149 "the mangling of %<va_list%> has changed in GCC 4.4");
23151 return "St9__va_list";
23154 /* Half-precision float. */
23155 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
23158 if (TREE_CODE (type) != VECTOR_TYPE)
23161 /* Check the mode of the vector type, and the name of the vector
23162 element type, against the table. */
23163 while (pos->mode != VOIDmode)
23165 tree elt_type = TREE_TYPE (type);
23167 if (pos->mode == TYPE_MODE (type)
23168 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
23169 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
23170 pos->element_type_name))
23171 return pos->aapcs_name;
23176 /* Use the default mangling for unrecognized (possibly user-defined)
23181 /* Order of allocation of core registers for Thumb: this allocation is
23182 written over the corresponding initial entries of the array
23183 initialized with REG_ALLOC_ORDER. We allocate all low registers
23184 first. Saving and restoring a low register is usually cheaper than
23185 using a call-clobbered high register. */
23187 static const int thumb_core_reg_alloc_order[] =
23189 3, 2, 1, 0, 4, 5, 6, 7,
23190 14, 12, 8, 9, 10, 11, 13, 15
23193 /* Adjust register allocation order when compiling for Thumb. */
23196 arm_order_regs_for_local_alloc (void)
23198 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
23199 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
23201 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
23202 sizeof (thumb_core_reg_alloc_order));
23205 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
23208 arm_frame_pointer_required (void)
23210 return (cfun->has_nonlocal_label
23211 || SUBTARGET_FRAME_POINTER_REQUIRED
23212 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
23215 /* Only thumb1 can't support conditional execution, so return true if
23216 the target is not thumb1. */
23218 arm_have_conditional_execution (void)
23220 return !TARGET_THUMB1;
23223 /* Legitimize a memory reference for sync primitive implemented using
23224 ldrex / strex. We currently force the form of the reference to be
23225 indirect without offset. We do not yet support the indirect offset
23226 addressing supported by some ARM targets for these
23229 arm_legitimize_sync_memory (rtx memory)
23231 rtx addr = force_reg (Pmode, XEXP (memory, 0));
23232 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
23234 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
23235 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
23236 return legitimate_memory;
23239 /* An instruction emitter. */
23240 typedef void (* emit_f) (int label, const char *, rtx *);
23242 /* An instruction emitter that emits via the conventional
23243 output_asm_insn. */
23245 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
23247 output_asm_insn (pattern, operands);
23250 /* Count the number of emitted synchronization instructions. */
23251 static unsigned arm_insn_count;
23253 /* An emitter that counts emitted instructions but does not actually
23254 emit instruction into the instruction stream. */
23256 arm_count (int label,
23257 const char *pattern ATTRIBUTE_UNUSED,
23258 rtx *operands ATTRIBUTE_UNUSED)
23264 /* Construct a pattern using conventional output formatting and feed
23265 it to output_asm_insn. Provides a mechanism to construct the
23266 output pattern on the fly. Note the hard limit on the pattern
23268 static void ATTRIBUTE_PRINTF_4
23269 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
23270 const char *pattern, ...)
23275 va_start (ap, pattern);
23276 vsprintf (buffer, pattern, ap);
23278 emit (label, buffer, operands);
23281 /* Emit the memory barrier instruction, if any, provided by this
23282 target to a specified emitter. */
23284 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23286 if (TARGET_HAVE_DMB)
23288 /* Note we issue a system level barrier. We should consider
23289 issuing a inner shareabilty zone barrier here instead, ie.
23291 emit (0, "dmb\tsy", operands);
23295 if (TARGET_HAVE_DMB_MCR)
23297 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23301 gcc_unreachable ();
23304 /* Emit the memory barrier instruction, if any, provided by this
23307 arm_output_memory_barrier (rtx *operands)
23309 arm_process_output_memory_barrier (arm_emit, operands);
23313 /* Helper to figure out the instruction suffix required on ldrex/strex
23314 for operations on an object of the specified mode. */
23315 static const char *
23316 arm_ldrex_suffix (enum machine_mode mode)
23320 case QImode: return "b";
23321 case HImode: return "h";
23322 case SImode: return "";
23323 case DImode: return "d";
23325 gcc_unreachable ();
23330 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23333 arm_output_ldrex (emit_f emit,
23334 enum machine_mode mode,
23338 const char *suffix = arm_ldrex_suffix (mode);
23341 operands[0] = target;
23342 operands[1] = memory;
23343 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
23346 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23349 arm_output_strex (emit_f emit,
23350 enum machine_mode mode,
23356 const char *suffix = arm_ldrex_suffix (mode);
23359 operands[0] = result;
23360 operands[1] = value;
23361 operands[2] = memory;
23362 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
23366 /* Helper to emit a two operand instruction. */
23368 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
23374 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23377 /* Helper to emit a three operand instruction. */
23379 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23386 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23389 /* Emit a load store exclusive synchronization loop.
23393 if old_value != required_value
23395 t1 = sync_op (old_value, new_value)
23396 [mem] = t1, t2 = [0|1]
23400 t1 == t2 is not permitted
23401 t1 == old_value is permitted
23405 RTX register or const_int representing the required old_value for
23406 the modify to continue, if NULL no comparsion is performed. */
23408 arm_output_sync_loop (emit_f emit,
23409 enum machine_mode mode,
23412 rtx required_value,
23416 enum attr_sync_op sync_op,
23417 int early_barrier_required)
23421 gcc_assert (t1 != t2);
23423 if (early_barrier_required)
23424 arm_process_output_memory_barrier (emit, NULL);
23426 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23428 arm_output_ldrex (emit, mode, old_value, memory);
23430 if (required_value)
23434 operands[0] = old_value;
23435 operands[1] = required_value;
23436 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23437 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23443 arm_output_op3 (emit, "add", t1, old_value, new_value);
23447 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23451 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23455 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23459 arm_output_op3 (emit,"and", t1, old_value, new_value);
23463 arm_output_op3 (emit, "and", t1, old_value, new_value);
23464 arm_output_op2 (emit, "mvn", t1, t1);
23474 arm_output_strex (emit, mode, "", t2, t1, memory);
23476 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23477 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23478 LOCAL_LABEL_PREFIX);
23482 /* Use old_value for the return value because for some operations
23483 the old_value can easily be restored. This saves one register. */
23484 arm_output_strex (emit, mode, "", old_value, t1, memory);
23485 operands[0] = old_value;
23486 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23487 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23488 LOCAL_LABEL_PREFIX);
23493 arm_output_op3 (emit, "sub", old_value, t1, new_value);
23497 arm_output_op3 (emit, "add", old_value, t1, new_value);
23501 arm_output_op3 (emit, "eor", old_value, t1, new_value);
23505 arm_output_op2 (emit, "mov", old_value, required_value);
23509 gcc_unreachable ();
23513 arm_process_output_memory_barrier (emit, NULL);
23514 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23518 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23521 default_value = operands[index - 1];
23523 return default_value;
23526 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23527 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23529 /* Extract the operands for a synchroniztion instruction from the
23530 instructions attributes and emit the instruction. */
23532 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23534 rtx result, memory, required_value, new_value, t1, t2;
23536 enum machine_mode mode;
23537 enum attr_sync_op sync_op;
23539 result = FETCH_SYNC_OPERAND(result, 0);
23540 memory = FETCH_SYNC_OPERAND(memory, 0);
23541 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23542 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23543 t1 = FETCH_SYNC_OPERAND(t1, 0);
23544 t2 = FETCH_SYNC_OPERAND(t2, 0);
23546 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23547 sync_op = get_attr_sync_op (insn);
23548 mode = GET_MODE (memory);
23550 arm_output_sync_loop (emit, mode, result, memory, required_value,
23551 new_value, t1, t2, sync_op, early_barrier);
23554 /* Emit a synchronization instruction loop. */
23556 arm_output_sync_insn (rtx insn, rtx *operands)
23558 arm_process_output_sync_insn (arm_emit, insn, operands);
23562 /* Count the number of machine instruction that will be emitted for a
23563 synchronization instruction. Note that the emitter used does not
23564 emit instructions, it just counts instructions being carefull not
23565 to count labels. */
23567 arm_sync_loop_insns (rtx insn, rtx *operands)
23569 arm_insn_count = 0;
23570 arm_process_output_sync_insn (arm_count, insn, operands);
23571 return arm_insn_count;
23574 /* Helper to call a target sync instruction generator, dealing with
23575 the variation in operands required by the different generators. */
23577 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23578 rtx memory, rtx required_value, rtx new_value)
23580 switch (generator->op)
23582 case arm_sync_generator_omn:
23583 gcc_assert (! required_value);
23584 return generator->u.omn (old_value, memory, new_value);
23586 case arm_sync_generator_omrn:
23587 gcc_assert (required_value);
23588 return generator->u.omrn (old_value, memory, required_value, new_value);
23594 /* Expand a synchronization loop. The synchronization loop is expanded
23595 as an opaque block of instructions in order to ensure that we do
23596 not subsequently get extraneous memory accesses inserted within the
23597 critical region. The exclusive access property of ldrex/strex is
23598 only guaranteed in there are no intervening memory accesses. */
23600 arm_expand_sync (enum machine_mode mode,
23601 struct arm_sync_generator *generator,
23602 rtx target, rtx memory, rtx required_value, rtx new_value)
23604 if (target == NULL)
23605 target = gen_reg_rtx (mode);
23607 memory = arm_legitimize_sync_memory (memory);
23608 if (mode != SImode)
23610 rtx load_temp = gen_reg_rtx (SImode);
23612 if (required_value)
23613 required_value = convert_modes (SImode, mode, required_value, true);
23615 new_value = convert_modes (SImode, mode, new_value, true);
23616 emit_insn (arm_call_generator (generator, load_temp, memory,
23617 required_value, new_value));
23618 emit_move_insn (target, gen_lowpart (mode, load_temp));
23622 emit_insn (arm_call_generator (generator, target, memory, required_value,
23627 static unsigned int
23628 arm_autovectorize_vector_sizes (void)
23630 return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
23634 arm_vector_alignment_reachable (const_tree type, bool is_packed)
23636 /* Vectors which aren't in packed structures will not be less aligned than
23637 the natural alignment of their element type, so this is safe. */
23638 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23641 return default_builtin_vector_alignment_reachable (type, is_packed);
23645 arm_builtin_support_vector_misalignment (enum machine_mode mode,
23646 const_tree type, int misalignment,
23649 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23651 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
23656 /* If the misalignment is unknown, we should be able to handle the access
23657 so long as it is not to a member of a packed data structure. */
23658 if (misalignment == -1)
23661 /* Return true if the misalignment is a multiple of the natural alignment
23662 of the vector's element type. This is probably always going to be
23663 true in practice, since we've already established that this isn't a
23665 return ((misalignment % align) == 0);
23668 return default_builtin_support_vector_misalignment (mode, type, misalignment,
23673 arm_conditional_register_usage (void)
23677 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
23679 for (regno = FIRST_FPA_REGNUM;
23680 regno <= LAST_FPA_REGNUM; ++regno)
23681 fixed_regs[regno] = call_used_regs[regno] = 1;
23684 if (TARGET_THUMB1 && optimize_size)
23686 /* When optimizing for size on Thumb-1, it's better not
23687 to use the HI regs, because of the overhead of
23689 for (regno = FIRST_HI_REGNUM;
23690 regno <= LAST_HI_REGNUM; ++regno)
23691 fixed_regs[regno] = call_used_regs[regno] = 1;
23694 /* The link register can be clobbered by any branch insn,
23695 but we have no way to track that at present, so mark
23696 it as unavailable. */
23698 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
23700 if (TARGET_32BIT && TARGET_HARD_FLOAT)
23702 if (TARGET_MAVERICK)
23704 for (regno = FIRST_FPA_REGNUM;
23705 regno <= LAST_FPA_REGNUM; ++ regno)
23706 fixed_regs[regno] = call_used_regs[regno] = 1;
23707 for (regno = FIRST_CIRRUS_FP_REGNUM;
23708 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
23710 fixed_regs[regno] = 0;
23711 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
23716 /* VFPv3 registers are disabled when earlier VFP
23717 versions are selected due to the definition of
23718 LAST_VFP_REGNUM. */
23719 for (regno = FIRST_VFP_REGNUM;
23720 regno <= LAST_VFP_REGNUM; ++ regno)
23722 fixed_regs[regno] = 0;
23723 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
23724 || regno >= FIRST_VFP_REGNUM + 32;
23729 if (TARGET_REALLY_IWMMXT)
23731 regno = FIRST_IWMMXT_GR_REGNUM;
23732 /* The 2002/10/09 revision of the XScale ABI has wCG0
23733 and wCG1 as call-preserved registers. The 2002/11/21
23734 revision changed this so that all wCG registers are
23735 scratch registers. */
23736 for (regno = FIRST_IWMMXT_GR_REGNUM;
23737 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
23738 fixed_regs[regno] = 0;
23739 /* The XScale ABI has wR0 - wR9 as scratch registers,
23740 the rest as call-preserved registers. */
23741 for (regno = FIRST_IWMMXT_REGNUM;
23742 regno <= LAST_IWMMXT_REGNUM; ++ regno)
23744 fixed_regs[regno] = 0;
23745 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
23749 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
23751 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23752 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23754 else if (TARGET_APCS_STACK)
23756 fixed_regs[10] = 1;
23757 call_used_regs[10] = 1;
23759 /* -mcaller-super-interworking reserves r11 for calls to
23760 _interwork_r11_call_via_rN(). Making the register global
23761 is an easy way of ensuring that it remains valid for all
23763 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
23764 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
23766 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23767 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23768 if (TARGET_CALLER_INTERWORKING)
23769 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23771 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23775 arm_preferred_rename_class (reg_class_t rclass)
23777 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
23778 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
23779 and code size can be reduced. */
23780 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
23786 /* Compute the atrribute "length" of insn "*push_multi".
23787 So this function MUST be kept in sync with that insn pattern. */
23789 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
23791 int i, regno, hi_reg;
23792 int num_saves = XVECLEN (parallel_op, 0);
23799 regno = REGNO (first_op);
23800 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
23801 for (i = 1; i < num_saves && !hi_reg; i++)
23803 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
23804 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
23812 #include "gt-arm.h"