1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
65 void (*arm_lang_output_object_attributes_hook)(void);
72 /* Forward function declarations. */
73 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
74 static int arm_compute_static_chain_stack_bytes (void);
75 static arm_stack_offsets *arm_get_frame_offsets (void);
76 static void arm_add_gc_roots (void);
77 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
78 HOST_WIDE_INT, rtx, rtx, int, int);
79 static unsigned bit_count (unsigned long);
80 static int arm_address_register_rtx_p (rtx, int);
81 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
82 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
83 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
84 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
85 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
86 inline static int thumb1_index_register_rtx_p (rtx, int);
87 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static rtx emit_sfm (int, int);
91 static unsigned arm_size_return_regs (void);
92 static bool arm_assemble_integer (rtx, unsigned int, int);
93 static void arm_print_operand (FILE *, rtx, int);
94 static void arm_print_operand_address (FILE *, rtx);
95 static bool arm_print_operand_punct_valid_p (unsigned char code);
96 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
97 static arm_cc get_arm_condition_code (rtx);
98 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
99 static rtx is_jump_table (rtx);
100 static const char *output_multi_immediate (rtx *, const char *, const char *,
102 static const char *shift_op (rtx, HOST_WIDE_INT *);
103 static struct machine_function *arm_init_machine_status (void);
104 static void thumb_exit (FILE *, int);
105 static rtx is_jump_table (rtx);
106 static HOST_WIDE_INT get_jump_table_size (rtx);
107 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
108 static Mnode *add_minipool_forward_ref (Mfix *);
109 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
110 static Mnode *add_minipool_backward_ref (Mfix *);
111 static void assign_minipool_offsets (Mfix *);
112 static void arm_print_value (FILE *, rtx);
113 static void dump_minipool (rtx);
114 static int arm_barrier_cost (rtx);
115 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
116 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
117 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
119 static void arm_reorg (void);
120 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
121 static unsigned long arm_compute_save_reg0_reg12_mask (void);
122 static unsigned long arm_compute_save_reg_mask (void);
123 static unsigned long arm_isr_value (tree);
124 static unsigned long arm_compute_func_type (void);
125 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
126 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
127 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
128 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
129 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
131 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
132 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
133 static int arm_comp_type_attributes (const_tree, const_tree);
134 static void arm_set_default_type_attributes (tree);
135 static int arm_adjust_cost (rtx, rtx, rtx, int);
136 static int optimal_immediate_sequence (enum rtx_code code,
137 unsigned HOST_WIDE_INT val,
138 struct four_ints *return_sequence);
139 static int optimal_immediate_sequence_1 (enum rtx_code code,
140 unsigned HOST_WIDE_INT val,
141 struct four_ints *return_sequence,
143 static int arm_get_strip_length (int);
144 static bool arm_function_ok_for_sibcall (tree, tree);
145 static enum machine_mode arm_promote_function_mode (const_tree,
146 enum machine_mode, int *,
148 static bool arm_return_in_memory (const_tree, const_tree);
149 static rtx arm_function_value (const_tree, const_tree, bool);
150 static rtx arm_libcall_value (enum machine_mode, const_rtx);
152 static void arm_internal_label (FILE *, const char *, unsigned long);
153 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
155 static bool arm_have_conditional_execution (void);
156 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
157 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
158 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
159 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
160 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
161 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
165 static int arm_address_cost (rtx, bool);
166 static bool arm_memory_load_p (rtx);
167 static bool arm_cirrus_insn_p (rtx);
168 static void cirrus_reorg (rtx);
169 static void arm_init_builtins (void);
170 static void arm_init_iwmmxt_builtins (void);
171 static rtx safe_vector_operand (rtx, enum machine_mode);
172 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
173 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
174 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
175 static tree arm_builtin_decl (unsigned, bool);
176 static void emit_constant_insn (rtx cond, rtx pattern);
177 static rtx emit_set_insn (rtx, rtx);
178 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
180 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
182 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
184 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
185 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
187 static int aapcs_select_return_coproc (const_tree, const_tree);
189 #ifdef OBJECT_FORMAT_ELF
190 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
191 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
194 static void arm_encode_section_info (tree, rtx, int);
197 static void arm_file_end (void);
198 static void arm_file_start (void);
200 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
202 static bool arm_pass_by_reference (cumulative_args_t,
203 enum machine_mode, const_tree, bool);
204 static bool arm_promote_prototypes (const_tree);
205 static bool arm_default_short_enums (void);
206 static bool arm_align_anon_bitfield (void);
207 static bool arm_return_in_msb (const_tree);
208 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
209 static bool arm_return_in_memory (const_tree, const_tree);
211 static void arm_unwind_emit (FILE *, rtx);
212 static bool arm_output_ttype (rtx);
213 static void arm_asm_emit_except_personality (rtx);
214 static void arm_asm_init_sections (void);
216 static rtx arm_dwarf_register_span (rtx);
218 static tree arm_cxx_guard_type (void);
219 static bool arm_cxx_guard_mask_bit (void);
220 static tree arm_get_cookie_size (tree);
221 static bool arm_cookie_has_size (void);
222 static bool arm_cxx_cdtor_returns_this (void);
223 static bool arm_cxx_key_method_may_be_inline (void);
224 static void arm_cxx_determine_class_data_visibility (tree);
225 static bool arm_cxx_class_data_always_comdat (void);
226 static bool arm_cxx_use_aeabi_atexit (void);
227 static void arm_init_libfuncs (void);
228 static tree arm_build_builtin_va_list (void);
229 static void arm_expand_builtin_va_start (tree, rtx);
230 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
231 static void arm_option_override (void);
232 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
233 static bool arm_cannot_copy_insn_p (rtx);
234 static bool arm_tls_symbol_p (rtx x);
235 static int arm_issue_rate (void);
236 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
237 static bool arm_output_addr_const_extra (FILE *, rtx);
238 static bool arm_allocate_stack_slots_for_args (void);
239 static const char *arm_invalid_parameter_type (const_tree t);
240 static const char *arm_invalid_return_type (const_tree t);
241 static tree arm_promoted_type (const_tree t);
242 static tree arm_convert_to_type (tree type, tree expr);
243 static bool arm_scalar_mode_supported_p (enum machine_mode);
244 static bool arm_frame_pointer_required (void);
245 static bool arm_can_eliminate (const int, const int);
246 static void arm_asm_trampoline_template (FILE *);
247 static void arm_trampoline_init (rtx, tree, rtx);
248 static rtx arm_trampoline_adjust_address (rtx);
249 static rtx arm_pic_static_addr (rtx orig, rtx reg);
250 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
251 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
252 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
253 static bool arm_array_mode_supported_p (enum machine_mode,
254 unsigned HOST_WIDE_INT);
255 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
256 static bool arm_class_likely_spilled_p (reg_class_t);
257 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
258 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
262 static void arm_conditional_register_usage (void);
263 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
264 static unsigned int arm_autovectorize_vector_sizes (void);
265 static int arm_default_branch_cost (bool, bool);
266 static int arm_cortex_a5_branch_cost (bool, bool);
269 /* Table of machine attributes. */
270 static const struct attribute_spec arm_attribute_table[] =
272 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
273 affects_type_identity } */
274 /* Function calls made to this symbol must be done indirectly, because
275 it may lie outside of the 26 bit addressing range of a normal function
277 { "long_call", 0, 0, false, true, true, NULL, false },
278 /* Whereas these functions are always known to reside within the 26 bit
280 { "short_call", 0, 0, false, true, true, NULL, false },
281 /* Specify the procedure call conventions for a function. */
282 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
284 /* Interrupt Service Routines have special prologue and epilogue requirements. */
285 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
287 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
289 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
292 /* ARM/PE has three new attributes:
294 dllexport - for exporting a function/variable that will live in a dll
295 dllimport - for importing a function/variable from a dll
297 Microsoft allows multiple declspecs in one __declspec, separating
298 them with spaces. We do NOT support this. Instead, use __declspec
301 { "dllimport", 0, 0, true, false, false, NULL, false },
302 { "dllexport", 0, 0, true, false, false, NULL, false },
303 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
305 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
306 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
307 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
308 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
311 { NULL, 0, 0, false, false, false, NULL, false }
314 /* Initialize the GCC target structure. */
315 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
316 #undef TARGET_MERGE_DECL_ATTRIBUTES
317 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
320 #undef TARGET_LEGITIMIZE_ADDRESS
321 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
323 #undef TARGET_ATTRIBUTE_TABLE
324 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
326 #undef TARGET_ASM_FILE_START
327 #define TARGET_ASM_FILE_START arm_file_start
328 #undef TARGET_ASM_FILE_END
329 #define TARGET_ASM_FILE_END arm_file_end
331 #undef TARGET_ASM_ALIGNED_SI_OP
332 #define TARGET_ASM_ALIGNED_SI_OP NULL
333 #undef TARGET_ASM_INTEGER
334 #define TARGET_ASM_INTEGER arm_assemble_integer
336 #undef TARGET_PRINT_OPERAND
337 #define TARGET_PRINT_OPERAND arm_print_operand
338 #undef TARGET_PRINT_OPERAND_ADDRESS
339 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
340 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
341 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
343 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
344 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
346 #undef TARGET_ASM_FUNCTION_PROLOGUE
347 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
349 #undef TARGET_ASM_FUNCTION_EPILOGUE
350 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
352 #undef TARGET_OPTION_OVERRIDE
353 #define TARGET_OPTION_OVERRIDE arm_option_override
355 #undef TARGET_COMP_TYPE_ATTRIBUTES
356 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
358 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
359 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
361 #undef TARGET_SCHED_ADJUST_COST
362 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
364 #undef TARGET_ENCODE_SECTION_INFO
366 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
368 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
371 #undef TARGET_STRIP_NAME_ENCODING
372 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
374 #undef TARGET_ASM_INTERNAL_LABEL
375 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
377 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
378 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
380 #undef TARGET_FUNCTION_VALUE
381 #define TARGET_FUNCTION_VALUE arm_function_value
383 #undef TARGET_LIBCALL_VALUE
384 #define TARGET_LIBCALL_VALUE arm_libcall_value
386 #undef TARGET_ASM_OUTPUT_MI_THUNK
387 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
388 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
389 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
391 #undef TARGET_RTX_COSTS
392 #define TARGET_RTX_COSTS arm_rtx_costs
393 #undef TARGET_ADDRESS_COST
394 #define TARGET_ADDRESS_COST arm_address_cost
396 #undef TARGET_SHIFT_TRUNCATION_MASK
397 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
398 #undef TARGET_VECTOR_MODE_SUPPORTED_P
399 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
400 #undef TARGET_ARRAY_MODE_SUPPORTED_P
401 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
402 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
403 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
404 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
405 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
406 arm_autovectorize_vector_sizes
408 #undef TARGET_MACHINE_DEPENDENT_REORG
409 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
411 #undef TARGET_INIT_BUILTINS
412 #define TARGET_INIT_BUILTINS arm_init_builtins
413 #undef TARGET_EXPAND_BUILTIN
414 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
415 #undef TARGET_BUILTIN_DECL
416 #define TARGET_BUILTIN_DECL arm_builtin_decl
418 #undef TARGET_INIT_LIBFUNCS
419 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
421 #undef TARGET_PROMOTE_FUNCTION_MODE
422 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
423 #undef TARGET_PROMOTE_PROTOTYPES
424 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
425 #undef TARGET_PASS_BY_REFERENCE
426 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
427 #undef TARGET_ARG_PARTIAL_BYTES
428 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
429 #undef TARGET_FUNCTION_ARG
430 #define TARGET_FUNCTION_ARG arm_function_arg
431 #undef TARGET_FUNCTION_ARG_ADVANCE
432 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
433 #undef TARGET_FUNCTION_ARG_BOUNDARY
434 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
436 #undef TARGET_SETUP_INCOMING_VARARGS
437 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
439 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
440 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
442 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
443 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
444 #undef TARGET_TRAMPOLINE_INIT
445 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
446 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
447 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
449 #undef TARGET_DEFAULT_SHORT_ENUMS
450 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
452 #undef TARGET_ALIGN_ANON_BITFIELD
453 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
455 #undef TARGET_NARROW_VOLATILE_BITFIELD
456 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
458 #undef TARGET_CXX_GUARD_TYPE
459 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
461 #undef TARGET_CXX_GUARD_MASK_BIT
462 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
464 #undef TARGET_CXX_GET_COOKIE_SIZE
465 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
467 #undef TARGET_CXX_COOKIE_HAS_SIZE
468 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
470 #undef TARGET_CXX_CDTOR_RETURNS_THIS
471 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
473 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
474 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
476 #undef TARGET_CXX_USE_AEABI_ATEXIT
477 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
479 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
480 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
481 arm_cxx_determine_class_data_visibility
483 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
484 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
486 #undef TARGET_RETURN_IN_MSB
487 #define TARGET_RETURN_IN_MSB arm_return_in_msb
489 #undef TARGET_RETURN_IN_MEMORY
490 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
492 #undef TARGET_MUST_PASS_IN_STACK
493 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
496 #undef TARGET_ASM_UNWIND_EMIT
497 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
499 /* EABI unwinding tables use a different format for the typeinfo tables. */
500 #undef TARGET_ASM_TTYPE
501 #define TARGET_ASM_TTYPE arm_output_ttype
503 #undef TARGET_ARM_EABI_UNWINDER
504 #define TARGET_ARM_EABI_UNWINDER true
506 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
507 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
509 #undef TARGET_ASM_INIT_SECTIONS
510 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
511 #endif /* ARM_UNWIND_INFO */
513 #undef TARGET_DWARF_REGISTER_SPAN
514 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
516 #undef TARGET_CANNOT_COPY_INSN_P
517 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
520 #undef TARGET_HAVE_TLS
521 #define TARGET_HAVE_TLS true
524 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
525 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
527 #undef TARGET_LEGITIMATE_CONSTANT_P
528 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
530 #undef TARGET_CANNOT_FORCE_CONST_MEM
531 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
533 #undef TARGET_MAX_ANCHOR_OFFSET
534 #define TARGET_MAX_ANCHOR_OFFSET 4095
536 /* The minimum is set such that the total size of the block
537 for a particular anchor is -4088 + 1 + 4095 bytes, which is
538 divisible by eight, ensuring natural spacing of anchors. */
539 #undef TARGET_MIN_ANCHOR_OFFSET
540 #define TARGET_MIN_ANCHOR_OFFSET -4088
542 #undef TARGET_SCHED_ISSUE_RATE
543 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
545 #undef TARGET_MANGLE_TYPE
546 #define TARGET_MANGLE_TYPE arm_mangle_type
548 #undef TARGET_BUILD_BUILTIN_VA_LIST
549 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
550 #undef TARGET_EXPAND_BUILTIN_VA_START
551 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
552 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
553 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
556 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
557 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
560 #undef TARGET_LEGITIMATE_ADDRESS_P
561 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
563 #undef TARGET_INVALID_PARAMETER_TYPE
564 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
566 #undef TARGET_INVALID_RETURN_TYPE
567 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
569 #undef TARGET_PROMOTED_TYPE
570 #define TARGET_PROMOTED_TYPE arm_promoted_type
572 #undef TARGET_CONVERT_TO_TYPE
573 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
575 #undef TARGET_SCALAR_MODE_SUPPORTED_P
576 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
578 #undef TARGET_FRAME_POINTER_REQUIRED
579 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
581 #undef TARGET_CAN_ELIMINATE
582 #define TARGET_CAN_ELIMINATE arm_can_eliminate
584 #undef TARGET_CONDITIONAL_REGISTER_USAGE
585 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
587 #undef TARGET_CLASS_LIKELY_SPILLED_P
588 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
590 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
591 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
592 arm_vector_alignment_reachable
594 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
595 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
596 arm_builtin_support_vector_misalignment
598 #undef TARGET_PREFERRED_RENAME_CLASS
599 #define TARGET_PREFERRED_RENAME_CLASS \
600 arm_preferred_rename_class
602 struct gcc_target targetm = TARGET_INITIALIZER;
604 /* Obstack for minipool constant handling. */
605 static struct obstack minipool_obstack;
606 static char * minipool_startobj;
608 /* The maximum number of insns skipped which
609 will be conditionalised if possible. */
610 static int max_insns_skipped = 5;
612 extern FILE * asm_out_file;
614 /* True if we are currently building a constant table. */
615 int making_const_table;
617 /* The processor for which instructions should be scheduled. */
618 enum processor_type arm_tune = arm_none;
620 /* The current tuning set. */
621 const struct tune_params *current_tune;
623 /* Which floating point hardware to schedule for. */
626 /* Which floating popint hardware to use. */
627 const struct arm_fpu_desc *arm_fpu_desc;
629 /* Used for Thumb call_via trampolines. */
630 rtx thumb_call_via_label[14];
631 static int thumb_call_reg_needed;
633 /* Bit values used to identify processor capabilities. */
634 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
635 #define FL_ARCH3M (1 << 1) /* Extended multiply */
636 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
637 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
638 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
639 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
640 #define FL_THUMB (1 << 6) /* Thumb aware */
641 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
642 #define FL_STRONG (1 << 8) /* StrongARM */
643 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
644 #define FL_XSCALE (1 << 10) /* XScale */
645 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
646 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
647 media instructions. */
648 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
649 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
650 Note: ARM6 & 7 derivatives only. */
651 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
652 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
653 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
655 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
656 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
657 #define FL_NEON (1 << 20) /* Neon instructions. */
658 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
660 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
661 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
663 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
665 /* Flags that only effect tuning, not available instructions. */
666 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
669 #define FL_FOR_ARCH2 FL_NOTM
670 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
671 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
672 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
673 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
674 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
675 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
676 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
677 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
678 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
679 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
680 #define FL_FOR_ARCH6J FL_FOR_ARCH6
681 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
682 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
683 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
684 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
685 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
686 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
687 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
688 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
689 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
690 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
692 /* The bits in this mask specify which
693 instructions we are allowed to generate. */
694 static unsigned long insn_flags = 0;
696 /* The bits in this mask specify which instruction scheduling options should
698 static unsigned long tune_flags = 0;
700 /* The following are used in the arm.md file as equivalents to bits
701 in the above two flag variables. */
703 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
706 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
709 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
712 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
715 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
718 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
721 /* Nonzero if this chip supports the ARM 6K extensions. */
724 /* Nonzero if this chip supports the ARM 7 extensions. */
727 /* Nonzero if instructions not present in the 'M' profile can be used. */
728 int arm_arch_notm = 0;
730 /* Nonzero if instructions present in ARMv7E-M can be used. */
733 /* Nonzero if this chip can benefit from load scheduling. */
734 int arm_ld_sched = 0;
736 /* Nonzero if this chip is a StrongARM. */
737 int arm_tune_strongarm = 0;
739 /* Nonzero if this chip is a Cirrus variant. */
740 int arm_arch_cirrus = 0;
742 /* Nonzero if this chip supports Intel Wireless MMX technology. */
743 int arm_arch_iwmmxt = 0;
745 /* Nonzero if this chip is an XScale. */
746 int arm_arch_xscale = 0;
748 /* Nonzero if tuning for XScale */
749 int arm_tune_xscale = 0;
751 /* Nonzero if we want to tune for stores that access the write-buffer.
752 This typically means an ARM6 or ARM7 with MMU or MPU. */
753 int arm_tune_wbuf = 0;
755 /* Nonzero if tuning for Cortex-A9. */
756 int arm_tune_cortex_a9 = 0;
758 /* Nonzero if generating Thumb instructions. */
761 /* Nonzero if generating Thumb-1 instructions. */
764 /* Nonzero if we should define __THUMB_INTERWORK__ in the
766 XXX This is a bit of a hack, it's intended to help work around
767 problems in GLD which doesn't understand that armv5t code is
768 interworking clean. */
769 int arm_cpp_interwork = 0;
771 /* Nonzero if chip supports Thumb 2. */
774 /* Nonzero if chip supports integer division instruction. */
775 int arm_arch_arm_hwdiv;
776 int arm_arch_thumb_hwdiv;
778 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
779 we must report the mode of the memory reference from
780 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
781 enum machine_mode output_memory_reference_mode;
783 /* The register number to be used for the PIC offset register. */
784 unsigned arm_pic_register = INVALID_REGNUM;
786 /* Set to 1 after arm_reorg has started. Reset to start at the start of
787 the next function. */
788 static int after_arm_reorg = 0;
790 enum arm_pcs arm_pcs_default;
792 /* For an explanation of these variables, see final_prescan_insn below. */
794 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
795 enum arm_cond_code arm_current_cc;
798 int arm_target_label;
799 /* The number of conditionally executed insns, including the current insn. */
800 int arm_condexec_count = 0;
801 /* A bitmask specifying the patterns for the IT block.
802 Zero means do not output an IT block before this insn. */
803 int arm_condexec_mask = 0;
804 /* The number of bits used in arm_condexec_mask. */
805 int arm_condexec_masklen = 0;
807 /* The condition codes of the ARM, and the inverse function. */
808 static const char * const arm_condition_codes[] =
810 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
811 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
814 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
815 int arm_regs_in_sequence[] =
817 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
820 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
821 #define streq(string1, string2) (strcmp (string1, string2) == 0)
823 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
824 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
825 | (1 << PIC_OFFSET_TABLE_REGNUM)))
827 /* Initialization code. */
831 const char *const name;
832 enum processor_type core;
834 const unsigned long flags;
835 const struct tune_params *const tune;
839 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
840 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
845 const struct tune_params arm_slowmul_tune =
847 arm_slowmul_rtx_costs,
849 3, /* Constant limit. */
850 5, /* Max cond insns. */
851 ARM_PREFETCH_NOT_BENEFICIAL,
852 true, /* Prefer constant pool. */
853 arm_default_branch_cost
856 const struct tune_params arm_fastmul_tune =
858 arm_fastmul_rtx_costs,
860 1, /* Constant limit. */
861 5, /* Max cond insns. */
862 ARM_PREFETCH_NOT_BENEFICIAL,
863 true, /* Prefer constant pool. */
864 arm_default_branch_cost
867 /* StrongARM has early execution of branches, so a sequence that is worth
868 skipping is shorter. Set max_insns_skipped to a lower value. */
870 const struct tune_params arm_strongarm_tune =
872 arm_fastmul_rtx_costs,
874 1, /* Constant limit. */
875 3, /* Max cond insns. */
876 ARM_PREFETCH_NOT_BENEFICIAL,
877 true, /* Prefer constant pool. */
878 arm_default_branch_cost
881 const struct tune_params arm_xscale_tune =
883 arm_xscale_rtx_costs,
884 xscale_sched_adjust_cost,
885 2, /* Constant limit. */
886 3, /* Max cond insns. */
887 ARM_PREFETCH_NOT_BENEFICIAL,
888 true, /* Prefer constant pool. */
889 arm_default_branch_cost
892 const struct tune_params arm_9e_tune =
896 1, /* Constant limit. */
897 5, /* Max cond insns. */
898 ARM_PREFETCH_NOT_BENEFICIAL,
899 true, /* Prefer constant pool. */
900 arm_default_branch_cost
903 const struct tune_params arm_v6t2_tune =
907 1, /* Constant limit. */
908 5, /* Max cond insns. */
909 ARM_PREFETCH_NOT_BENEFICIAL,
910 false, /* Prefer constant pool. */
911 arm_default_branch_cost
914 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
915 const struct tune_params arm_cortex_tune =
919 1, /* Constant limit. */
920 5, /* Max cond insns. */
921 ARM_PREFETCH_NOT_BENEFICIAL,
922 false, /* Prefer constant pool. */
923 arm_default_branch_cost
926 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
927 less appealing. Set max_insns_skipped to a low value. */
929 const struct tune_params arm_cortex_a5_tune =
933 1, /* Constant limit. */
934 1, /* Max cond insns. */
935 ARM_PREFETCH_NOT_BENEFICIAL,
936 false, /* Prefer constant pool. */
937 arm_cortex_a5_branch_cost
940 const struct tune_params arm_cortex_a9_tune =
943 cortex_a9_sched_adjust_cost,
944 1, /* Constant limit. */
945 5, /* Max cond insns. */
946 ARM_PREFETCH_BENEFICIAL(4,32,32),
947 false, /* Prefer constant pool. */
948 arm_default_branch_cost
951 const struct tune_params arm_fa726te_tune =
954 fa726te_sched_adjust_cost,
955 1, /* Constant limit. */
956 5, /* Max cond insns. */
957 ARM_PREFETCH_NOT_BENEFICIAL,
958 true, /* Prefer constant pool. */
959 arm_default_branch_cost
963 /* Not all of these give usefully different compilation alternatives,
964 but there is no simple way of generalizing them. */
965 static const struct processors all_cores[] =
968 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
969 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
970 #include "arm-cores.def"
972 {NULL, arm_none, NULL, 0, NULL}
975 static const struct processors all_architectures[] =
977 /* ARM Architectures */
978 /* We don't specify tuning costs here as it will be figured out
981 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
982 {NAME, CORE, #ARCH, FLAGS, NULL},
983 #include "arm-arches.def"
985 {NULL, arm_none, NULL, 0 , NULL}
989 /* These are populated as commandline arguments are processed, or NULL
991 static const struct processors *arm_selected_arch;
992 static const struct processors *arm_selected_cpu;
993 static const struct processors *arm_selected_tune;
995 /* The name of the preprocessor macro to define for this architecture. */
997 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
999 /* Available values for -mfpu=. */
1001 static const struct arm_fpu_desc all_fpus[] =
1003 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
1004 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1005 #include "arm-fpus.def"
1010 /* Supported TLS relocations. */
1018 TLS_DESCSEQ /* GNU scheme */
1021 /* The maximum number of insns to be used when loading a constant. */
1023 arm_constant_limit (bool size_p)
1025 return size_p ? 1 : current_tune->constant_limit;
1028 /* Emit an insn that's a simple single-set. Both the operands must be known
1031 emit_set_insn (rtx x, rtx y)
1033 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1036 /* Return the number of bits set in VALUE. */
1038 bit_count (unsigned long value)
1040 unsigned long count = 0;
1045 value &= value - 1; /* Clear the least-significant set bit. */
1053 enum machine_mode mode;
1055 } arm_fixed_mode_set;
1057 /* A small helper for setting fixed-point library libfuncs. */
1060 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1061 const char *funcname, const char *modename,
1066 if (num_suffix == 0)
1067 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1069 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1071 set_optab_libfunc (optable, mode, buffer);
1075 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1076 enum machine_mode from, const char *funcname,
1077 const char *toname, const char *fromname)
1080 const char *maybe_suffix_2 = "";
1082 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1083 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1084 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1085 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1086 maybe_suffix_2 = "2";
1088 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1091 set_conv_libfunc (optable, to, from, buffer);
1094 /* Set up library functions unique to ARM. */
1097 arm_init_libfuncs (void)
1099 /* There are no special library functions unless we are using the
1104 /* The functions below are described in Section 4 of the "Run-Time
1105 ABI for the ARM architecture", Version 1.0. */
1107 /* Double-precision floating-point arithmetic. Table 2. */
1108 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1109 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1110 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1111 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1112 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1114 /* Double-precision comparisons. Table 3. */
1115 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1116 set_optab_libfunc (ne_optab, DFmode, NULL);
1117 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1118 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1119 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1120 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1121 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1123 /* Single-precision floating-point arithmetic. Table 4. */
1124 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1125 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1126 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1127 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1128 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1130 /* Single-precision comparisons. Table 5. */
1131 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1132 set_optab_libfunc (ne_optab, SFmode, NULL);
1133 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1134 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1135 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1136 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1137 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1139 /* Floating-point to integer conversions. Table 6. */
1140 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1141 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1142 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1143 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1144 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1145 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1146 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1147 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1149 /* Conversions between floating types. Table 7. */
1150 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1151 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1153 /* Integer to floating-point conversions. Table 8. */
1154 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1155 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1156 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1157 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1158 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1159 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1160 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1161 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1163 /* Long long. Table 9. */
1164 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1165 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1166 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1167 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1168 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1169 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1170 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1171 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1173 /* Integer (32/32->32) division. \S 4.3.1. */
1174 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1175 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1177 /* The divmod functions are designed so that they can be used for
1178 plain division, even though they return both the quotient and the
1179 remainder. The quotient is returned in the usual location (i.e.,
1180 r0 for SImode, {r0, r1} for DImode), just as would be expected
1181 for an ordinary division routine. Because the AAPCS calling
1182 conventions specify that all of { r0, r1, r2, r3 } are
1183 callee-saved registers, there is no need to tell the compiler
1184 explicitly that those registers are clobbered by these
1186 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1187 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1189 /* For SImode division the ABI provides div-without-mod routines,
1190 which are faster. */
1191 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1192 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1194 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1195 divmod libcalls instead. */
1196 set_optab_libfunc (smod_optab, DImode, NULL);
1197 set_optab_libfunc (umod_optab, DImode, NULL);
1198 set_optab_libfunc (smod_optab, SImode, NULL);
1199 set_optab_libfunc (umod_optab, SImode, NULL);
1201 /* Half-precision float operations. The compiler handles all operations
1202 with NULL libfuncs by converting the SFmode. */
1203 switch (arm_fp16_format)
1205 case ARM_FP16_FORMAT_IEEE:
1206 case ARM_FP16_FORMAT_ALTERNATIVE:
1209 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1210 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1212 : "__gnu_f2h_alternative"));
1213 set_conv_libfunc (sext_optab, SFmode, HFmode,
1214 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1216 : "__gnu_h2f_alternative"));
1219 set_optab_libfunc (add_optab, HFmode, NULL);
1220 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1221 set_optab_libfunc (smul_optab, HFmode, NULL);
1222 set_optab_libfunc (neg_optab, HFmode, NULL);
1223 set_optab_libfunc (sub_optab, HFmode, NULL);
1226 set_optab_libfunc (eq_optab, HFmode, NULL);
1227 set_optab_libfunc (ne_optab, HFmode, NULL);
1228 set_optab_libfunc (lt_optab, HFmode, NULL);
1229 set_optab_libfunc (le_optab, HFmode, NULL);
1230 set_optab_libfunc (ge_optab, HFmode, NULL);
1231 set_optab_libfunc (gt_optab, HFmode, NULL);
1232 set_optab_libfunc (unord_optab, HFmode, NULL);
1239 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1241 const arm_fixed_mode_set fixed_arith_modes[] =
1262 const arm_fixed_mode_set fixed_conv_modes[] =
1292 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1294 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1295 "add", fixed_arith_modes[i].name, 3);
1296 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1297 "ssadd", fixed_arith_modes[i].name, 3);
1298 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1299 "usadd", fixed_arith_modes[i].name, 3);
1300 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1301 "sub", fixed_arith_modes[i].name, 3);
1302 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1303 "sssub", fixed_arith_modes[i].name, 3);
1304 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1305 "ussub", fixed_arith_modes[i].name, 3);
1306 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1307 "mul", fixed_arith_modes[i].name, 3);
1308 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1309 "ssmul", fixed_arith_modes[i].name, 3);
1310 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1311 "usmul", fixed_arith_modes[i].name, 3);
1312 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1313 "div", fixed_arith_modes[i].name, 3);
1314 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1315 "udiv", fixed_arith_modes[i].name, 3);
1316 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1317 "ssdiv", fixed_arith_modes[i].name, 3);
1318 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1319 "usdiv", fixed_arith_modes[i].name, 3);
1320 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1321 "neg", fixed_arith_modes[i].name, 2);
1322 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1323 "ssneg", fixed_arith_modes[i].name, 2);
1324 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1325 "usneg", fixed_arith_modes[i].name, 2);
1326 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1327 "ashl", fixed_arith_modes[i].name, 3);
1328 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1329 "ashr", fixed_arith_modes[i].name, 3);
1330 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1331 "lshr", fixed_arith_modes[i].name, 3);
1332 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1333 "ssashl", fixed_arith_modes[i].name, 3);
1334 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1335 "usashl", fixed_arith_modes[i].name, 3);
1336 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1337 "cmp", fixed_arith_modes[i].name, 2);
1340 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1341 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1344 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1345 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1348 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1349 fixed_conv_modes[j].mode, "fract",
1350 fixed_conv_modes[i].name,
1351 fixed_conv_modes[j].name);
1352 arm_set_fixed_conv_libfunc (satfract_optab,
1353 fixed_conv_modes[i].mode,
1354 fixed_conv_modes[j].mode, "satfract",
1355 fixed_conv_modes[i].name,
1356 fixed_conv_modes[j].name);
1357 arm_set_fixed_conv_libfunc (fractuns_optab,
1358 fixed_conv_modes[i].mode,
1359 fixed_conv_modes[j].mode, "fractuns",
1360 fixed_conv_modes[i].name,
1361 fixed_conv_modes[j].name);
1362 arm_set_fixed_conv_libfunc (satfractuns_optab,
1363 fixed_conv_modes[i].mode,
1364 fixed_conv_modes[j].mode, "satfractuns",
1365 fixed_conv_modes[i].name,
1366 fixed_conv_modes[j].name);
1370 if (TARGET_AAPCS_BASED)
1371 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1374 /* On AAPCS systems, this is the "struct __va_list". */
1375 static GTY(()) tree va_list_type;
1377 /* Return the type to use as __builtin_va_list. */
1379 arm_build_builtin_va_list (void)
1384 if (!TARGET_AAPCS_BASED)
1385 return std_build_builtin_va_list ();
1387 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1395 The C Library ABI further reinforces this definition in \S
1398 We must follow this definition exactly. The structure tag
1399 name is visible in C++ mangled names, and thus forms a part
1400 of the ABI. The field name may be used by people who
1401 #include <stdarg.h>. */
1402 /* Create the type. */
1403 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1404 /* Give it the required name. */
1405 va_list_name = build_decl (BUILTINS_LOCATION,
1407 get_identifier ("__va_list"),
1409 DECL_ARTIFICIAL (va_list_name) = 1;
1410 TYPE_NAME (va_list_type) = va_list_name;
1411 TYPE_STUB_DECL (va_list_type) = va_list_name;
1412 /* Create the __ap field. */
1413 ap_field = build_decl (BUILTINS_LOCATION,
1415 get_identifier ("__ap"),
1417 DECL_ARTIFICIAL (ap_field) = 1;
1418 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1419 TYPE_FIELDS (va_list_type) = ap_field;
1420 /* Compute its layout. */
1421 layout_type (va_list_type);
1423 return va_list_type;
1426 /* Return an expression of type "void *" pointing to the next
1427 available argument in a variable-argument list. VALIST is the
1428 user-level va_list object, of type __builtin_va_list. */
1430 arm_extract_valist_ptr (tree valist)
1432 if (TREE_TYPE (valist) == error_mark_node)
1433 return error_mark_node;
1435 /* On an AAPCS target, the pointer is stored within "struct
1437 if (TARGET_AAPCS_BASED)
1439 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1440 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1441 valist, ap_field, NULL_TREE);
1447 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1449 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1451 valist = arm_extract_valist_ptr (valist);
1452 std_expand_builtin_va_start (valist, nextarg);
1455 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1457 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1460 valist = arm_extract_valist_ptr (valist);
1461 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1464 /* Fix up any incompatible options that the user has specified. */
1466 arm_option_override (void)
1468 if (global_options_set.x_arm_arch_option)
1469 arm_selected_arch = &all_architectures[arm_arch_option];
1471 if (global_options_set.x_arm_cpu_option)
1472 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1474 if (global_options_set.x_arm_tune_option)
1475 arm_selected_tune = &all_cores[(int) arm_tune_option];
1477 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1478 SUBTARGET_OVERRIDE_OPTIONS;
1481 if (arm_selected_arch)
1483 if (arm_selected_cpu)
1485 /* Check for conflict between mcpu and march. */
1486 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1488 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1489 arm_selected_cpu->name, arm_selected_arch->name);
1490 /* -march wins for code generation.
1491 -mcpu wins for default tuning. */
1492 if (!arm_selected_tune)
1493 arm_selected_tune = arm_selected_cpu;
1495 arm_selected_cpu = arm_selected_arch;
1499 arm_selected_arch = NULL;
1502 /* Pick a CPU based on the architecture. */
1503 arm_selected_cpu = arm_selected_arch;
1506 /* If the user did not specify a processor, choose one for them. */
1507 if (!arm_selected_cpu)
1509 const struct processors * sel;
1510 unsigned int sought;
1512 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1513 if (!arm_selected_cpu->name)
1515 #ifdef SUBTARGET_CPU_DEFAULT
1516 /* Use the subtarget default CPU if none was specified by
1518 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1520 /* Default to ARM6. */
1521 if (!arm_selected_cpu->name)
1522 arm_selected_cpu = &all_cores[arm6];
1525 sel = arm_selected_cpu;
1526 insn_flags = sel->flags;
1528 /* Now check to see if the user has specified some command line
1529 switch that require certain abilities from the cpu. */
1532 if (TARGET_INTERWORK || TARGET_THUMB)
1534 sought |= (FL_THUMB | FL_MODE32);
1536 /* There are no ARM processors that support both APCS-26 and
1537 interworking. Therefore we force FL_MODE26 to be removed
1538 from insn_flags here (if it was set), so that the search
1539 below will always be able to find a compatible processor. */
1540 insn_flags &= ~FL_MODE26;
1543 if (sought != 0 && ((sought & insn_flags) != sought))
1545 /* Try to locate a CPU type that supports all of the abilities
1546 of the default CPU, plus the extra abilities requested by
1548 for (sel = all_cores; sel->name != NULL; sel++)
1549 if ((sel->flags & sought) == (sought | insn_flags))
1552 if (sel->name == NULL)
1554 unsigned current_bit_count = 0;
1555 const struct processors * best_fit = NULL;
1557 /* Ideally we would like to issue an error message here
1558 saying that it was not possible to find a CPU compatible
1559 with the default CPU, but which also supports the command
1560 line options specified by the programmer, and so they
1561 ought to use the -mcpu=<name> command line option to
1562 override the default CPU type.
1564 If we cannot find a cpu that has both the
1565 characteristics of the default cpu and the given
1566 command line options we scan the array again looking
1567 for a best match. */
1568 for (sel = all_cores; sel->name != NULL; sel++)
1569 if ((sel->flags & sought) == sought)
1573 count = bit_count (sel->flags & insn_flags);
1575 if (count >= current_bit_count)
1578 current_bit_count = count;
1582 gcc_assert (best_fit);
1586 arm_selected_cpu = sel;
1590 gcc_assert (arm_selected_cpu);
1591 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1592 if (!arm_selected_tune)
1593 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1595 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1596 insn_flags = arm_selected_cpu->flags;
1598 arm_tune = arm_selected_tune->core;
1599 tune_flags = arm_selected_tune->flags;
1600 current_tune = arm_selected_tune->tune;
1602 /* Make sure that the processor choice does not conflict with any of the
1603 other command line choices. */
1604 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1605 error ("target CPU does not support ARM mode");
1607 /* BPABI targets use linker tricks to allow interworking on cores
1608 without thumb support. */
1609 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1611 warning (0, "target CPU does not support interworking" );
1612 target_flags &= ~MASK_INTERWORK;
1615 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1617 warning (0, "target CPU does not support THUMB instructions");
1618 target_flags &= ~MASK_THUMB;
1621 if (TARGET_APCS_FRAME && TARGET_THUMB)
1623 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1624 target_flags &= ~MASK_APCS_FRAME;
1627 /* Callee super interworking implies thumb interworking. Adding
1628 this to the flags here simplifies the logic elsewhere. */
1629 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1630 target_flags |= MASK_INTERWORK;
1632 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1633 from here where no function is being compiled currently. */
1634 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1635 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1637 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1638 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1640 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1642 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1643 target_flags |= MASK_APCS_FRAME;
1646 if (TARGET_POKE_FUNCTION_NAME)
1647 target_flags |= MASK_APCS_FRAME;
1649 if (TARGET_APCS_REENT && flag_pic)
1650 error ("-fpic and -mapcs-reent are incompatible");
1652 if (TARGET_APCS_REENT)
1653 warning (0, "APCS reentrant code not supported. Ignored");
1655 /* If this target is normally configured to use APCS frames, warn if they
1656 are turned off and debugging is turned on. */
1658 && write_symbols != NO_DEBUG
1659 && !TARGET_APCS_FRAME
1660 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1661 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1663 if (TARGET_APCS_FLOAT)
1664 warning (0, "passing floating point arguments in fp regs not yet supported");
1666 if (TARGET_LITTLE_WORDS)
1667 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1668 "will be removed in a future release");
1670 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1671 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1672 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1673 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1674 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1675 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1676 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1677 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1678 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1679 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1680 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1681 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1682 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1683 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1685 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1686 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1687 thumb_code = TARGET_ARM == 0;
1688 thumb1_code = TARGET_THUMB1 != 0;
1689 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1690 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1691 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1692 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1693 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1694 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1696 /* If we are not using the default (ARM mode) section anchor offset
1697 ranges, then set the correct ranges now. */
1700 /* Thumb-1 LDR instructions cannot have negative offsets.
1701 Permissible positive offset ranges are 5-bit (for byte loads),
1702 6-bit (for halfword loads), or 7-bit (for word loads).
1703 Empirical results suggest a 7-bit anchor range gives the best
1704 overall code size. */
1705 targetm.min_anchor_offset = 0;
1706 targetm.max_anchor_offset = 127;
1708 else if (TARGET_THUMB2)
1710 /* The minimum is set such that the total size of the block
1711 for a particular anchor is 248 + 1 + 4095 bytes, which is
1712 divisible by eight, ensuring natural spacing of anchors. */
1713 targetm.min_anchor_offset = -248;
1714 targetm.max_anchor_offset = 4095;
1717 /* V5 code we generate is completely interworking capable, so we turn off
1718 TARGET_INTERWORK here to avoid many tests later on. */
1720 /* XXX However, we must pass the right pre-processor defines to CPP
1721 or GLD can get confused. This is a hack. */
1722 if (TARGET_INTERWORK)
1723 arm_cpp_interwork = 1;
1726 target_flags &= ~MASK_INTERWORK;
1728 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1729 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1731 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1732 error ("iwmmxt abi requires an iwmmxt capable cpu");
1734 if (!global_options_set.x_arm_fpu_index)
1736 const char *target_fpu_name;
1739 #ifdef FPUTYPE_DEFAULT
1740 target_fpu_name = FPUTYPE_DEFAULT;
1742 if (arm_arch_cirrus)
1743 target_fpu_name = "maverick";
1745 target_fpu_name = "fpe2";
1748 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1753 arm_fpu_desc = &all_fpus[arm_fpu_index];
1755 switch (arm_fpu_desc->model)
1757 case ARM_FP_MODEL_FPA:
1758 if (arm_fpu_desc->rev == 2)
1759 arm_fpu_attr = FPU_FPE2;
1760 else if (arm_fpu_desc->rev == 3)
1761 arm_fpu_attr = FPU_FPE3;
1763 arm_fpu_attr = FPU_FPA;
1766 case ARM_FP_MODEL_MAVERICK:
1767 arm_fpu_attr = FPU_MAVERICK;
1770 case ARM_FP_MODEL_VFP:
1771 arm_fpu_attr = FPU_VFP;
1778 if (TARGET_AAPCS_BASED
1779 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1780 error ("FPA is unsupported in the AAPCS");
1782 if (TARGET_AAPCS_BASED)
1784 if (TARGET_CALLER_INTERWORKING)
1785 error ("AAPCS does not support -mcaller-super-interworking");
1787 if (TARGET_CALLEE_INTERWORKING)
1788 error ("AAPCS does not support -mcallee-super-interworking");
1791 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1792 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1793 will ever exist. GCC makes no attempt to support this combination. */
1794 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1795 sorry ("iWMMXt and hardware floating point");
1797 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1798 if (TARGET_THUMB2 && TARGET_IWMMXT)
1799 sorry ("Thumb-2 iWMMXt");
1801 /* __fp16 support currently assumes the core has ldrh. */
1802 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1803 sorry ("__fp16 and no ldrh");
1805 /* If soft-float is specified then don't use FPU. */
1806 if (TARGET_SOFT_FLOAT)
1807 arm_fpu_attr = FPU_NONE;
1809 if (TARGET_AAPCS_BASED)
1811 if (arm_abi == ARM_ABI_IWMMXT)
1812 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1813 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1814 && TARGET_HARD_FLOAT
1816 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1818 arm_pcs_default = ARM_PCS_AAPCS;
1822 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1823 sorry ("-mfloat-abi=hard and VFP");
1825 if (arm_abi == ARM_ABI_APCS)
1826 arm_pcs_default = ARM_PCS_APCS;
1828 arm_pcs_default = ARM_PCS_ATPCS;
1831 /* For arm2/3 there is no need to do any scheduling if there is only
1832 a floating point emulator, or we are doing software floating-point. */
1833 if ((TARGET_SOFT_FLOAT
1834 || (TARGET_FPA && arm_fpu_desc->rev))
1835 && (tune_flags & FL_MODE32) == 0)
1836 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1838 /* Use the cp15 method if it is available. */
1839 if (target_thread_pointer == TP_AUTO)
1841 if (arm_arch6k && !TARGET_THUMB1)
1842 target_thread_pointer = TP_CP15;
1844 target_thread_pointer = TP_SOFT;
1847 if (TARGET_HARD_TP && TARGET_THUMB1)
1848 error ("can not use -mtp=cp15 with 16-bit Thumb");
1850 /* Override the default structure alignment for AAPCS ABI. */
1851 if (!global_options_set.x_arm_structure_size_boundary)
1853 if (TARGET_AAPCS_BASED)
1854 arm_structure_size_boundary = 8;
1858 if (arm_structure_size_boundary != 8
1859 && arm_structure_size_boundary != 32
1860 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1862 if (ARM_DOUBLEWORD_ALIGN)
1864 "structure size boundary can only be set to 8, 32 or 64");
1866 warning (0, "structure size boundary can only be set to 8 or 32");
1867 arm_structure_size_boundary
1868 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1872 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1874 error ("RTP PIC is incompatible with Thumb");
1878 /* If stack checking is disabled, we can use r10 as the PIC register,
1879 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1880 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1882 if (TARGET_VXWORKS_RTP)
1883 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1884 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1887 if (flag_pic && TARGET_VXWORKS_RTP)
1888 arm_pic_register = 9;
1890 if (arm_pic_register_string != NULL)
1892 int pic_register = decode_reg_name (arm_pic_register_string);
1895 warning (0, "-mpic-register= is useless without -fpic");
1897 /* Prevent the user from choosing an obviously stupid PIC register. */
1898 else if (pic_register < 0 || call_used_regs[pic_register]
1899 || pic_register == HARD_FRAME_POINTER_REGNUM
1900 || pic_register == STACK_POINTER_REGNUM
1901 || pic_register >= PC_REGNUM
1902 || (TARGET_VXWORKS_RTP
1903 && (unsigned int) pic_register != arm_pic_register))
1904 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1906 arm_pic_register = pic_register;
1909 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1910 if (fix_cm3_ldrd == 2)
1912 if (arm_selected_cpu->core == cortexm3)
1918 /* Enable -munaligned-access by default for
1919 - all ARMv6 architecture-based processors
1920 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1922 Disable -munaligned-access by default for
1923 - all pre-ARMv6 architecture-based processors
1924 - ARMv6-M architecture-based processors. */
1926 if (unaligned_access == 2)
1928 if (arm_arch6 && (arm_arch_notm || arm_arch7))
1929 unaligned_access = 1;
1931 unaligned_access = 0;
1933 else if (unaligned_access == 1
1934 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
1936 warning (0, "target CPU does not support unaligned accesses");
1937 unaligned_access = 0;
1940 if (TARGET_THUMB1 && flag_schedule_insns)
1942 /* Don't warn since it's on by default in -O2. */
1943 flag_schedule_insns = 0;
1948 /* If optimizing for size, bump the number of instructions that we
1949 are prepared to conditionally execute (even on a StrongARM). */
1950 max_insns_skipped = 6;
1953 max_insns_skipped = current_tune->max_insns_skipped;
1955 /* Hot/Cold partitioning is not currently supported, since we can't
1956 handle literal pool placement in that case. */
1957 if (flag_reorder_blocks_and_partition)
1959 inform (input_location,
1960 "-freorder-blocks-and-partition not supported on this architecture");
1961 flag_reorder_blocks_and_partition = 0;
1962 flag_reorder_blocks = 1;
1966 /* Hoisting PIC address calculations more aggressively provides a small,
1967 but measurable, size reduction for PIC code. Therefore, we decrease
1968 the bar for unrestricted expression hoisting to the cost of PIC address
1969 calculation, which is 2 instructions. */
1970 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1971 global_options.x_param_values,
1972 global_options_set.x_param_values);
1974 /* ARM EABI defaults to strict volatile bitfields. */
1975 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
1976 flag_strict_volatile_bitfields = 1;
1978 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1979 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1980 if (flag_prefetch_loop_arrays < 0
1983 && current_tune->num_prefetch_slots > 0)
1984 flag_prefetch_loop_arrays = 1;
1986 /* Set up parameters to be used in prefetching algorithm. Do not override the
1987 defaults unless we are tuning for a core we have researched values for. */
1988 if (current_tune->num_prefetch_slots > 0)
1989 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1990 current_tune->num_prefetch_slots,
1991 global_options.x_param_values,
1992 global_options_set.x_param_values);
1993 if (current_tune->l1_cache_line_size >= 0)
1994 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1995 current_tune->l1_cache_line_size,
1996 global_options.x_param_values,
1997 global_options_set.x_param_values);
1998 if (current_tune->l1_cache_size >= 0)
1999 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2000 current_tune->l1_cache_size,
2001 global_options.x_param_values,
2002 global_options_set.x_param_values);
2004 /* Register global variables with the garbage collector. */
2005 arm_add_gc_roots ();
2009 arm_add_gc_roots (void)
2011 gcc_obstack_init(&minipool_obstack);
2012 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2015 /* A table of known ARM exception types.
2016 For use with the interrupt function attribute. */
2020 const char *const arg;
2021 const unsigned long return_value;
2025 static const isr_attribute_arg isr_attribute_args [] =
2027 { "IRQ", ARM_FT_ISR },
2028 { "irq", ARM_FT_ISR },
2029 { "FIQ", ARM_FT_FIQ },
2030 { "fiq", ARM_FT_FIQ },
2031 { "ABORT", ARM_FT_ISR },
2032 { "abort", ARM_FT_ISR },
2033 { "ABORT", ARM_FT_ISR },
2034 { "abort", ARM_FT_ISR },
2035 { "UNDEF", ARM_FT_EXCEPTION },
2036 { "undef", ARM_FT_EXCEPTION },
2037 { "SWI", ARM_FT_EXCEPTION },
2038 { "swi", ARM_FT_EXCEPTION },
2039 { NULL, ARM_FT_NORMAL }
2042 /* Returns the (interrupt) function type of the current
2043 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2045 static unsigned long
2046 arm_isr_value (tree argument)
2048 const isr_attribute_arg * ptr;
2052 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2054 /* No argument - default to IRQ. */
2055 if (argument == NULL_TREE)
2058 /* Get the value of the argument. */
2059 if (TREE_VALUE (argument) == NULL_TREE
2060 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2061 return ARM_FT_UNKNOWN;
2063 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2065 /* Check it against the list of known arguments. */
2066 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2067 if (streq (arg, ptr->arg))
2068 return ptr->return_value;
2070 /* An unrecognized interrupt type. */
2071 return ARM_FT_UNKNOWN;
2074 /* Computes the type of the current function. */
2076 static unsigned long
2077 arm_compute_func_type (void)
2079 unsigned long type = ARM_FT_UNKNOWN;
2083 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2085 /* Decide if the current function is volatile. Such functions
2086 never return, and many memory cycles can be saved by not storing
2087 register values that will never be needed again. This optimization
2088 was added to speed up context switching in a kernel application. */
2090 && (TREE_NOTHROW (current_function_decl)
2091 || !(flag_unwind_tables
2093 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2094 && TREE_THIS_VOLATILE (current_function_decl))
2095 type |= ARM_FT_VOLATILE;
2097 if (cfun->static_chain_decl != NULL)
2098 type |= ARM_FT_NESTED;
2100 attr = DECL_ATTRIBUTES (current_function_decl);
2102 a = lookup_attribute ("naked", attr);
2104 type |= ARM_FT_NAKED;
2106 a = lookup_attribute ("isr", attr);
2108 a = lookup_attribute ("interrupt", attr);
2111 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2113 type |= arm_isr_value (TREE_VALUE (a));
2118 /* Returns the type of the current function. */
2121 arm_current_func_type (void)
2123 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2124 cfun->machine->func_type = arm_compute_func_type ();
2126 return cfun->machine->func_type;
2130 arm_allocate_stack_slots_for_args (void)
2132 /* Naked functions should not allocate stack slots for arguments. */
2133 return !IS_NAKED (arm_current_func_type ());
2137 /* Output assembler code for a block containing the constant parts
2138 of a trampoline, leaving space for the variable parts.
2140 On the ARM, (if r8 is the static chain regnum, and remembering that
2141 referencing pc adds an offset of 8) the trampoline looks like:
2144 .word static chain value
2145 .word function's address
2146 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2149 arm_asm_trampoline_template (FILE *f)
2153 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2154 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2156 else if (TARGET_THUMB2)
2158 /* The Thumb-2 trampoline is similar to the arm implementation.
2159 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2160 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2161 STATIC_CHAIN_REGNUM, PC_REGNUM);
2162 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2166 ASM_OUTPUT_ALIGN (f, 2);
2167 fprintf (f, "\t.code\t16\n");
2168 fprintf (f, ".Ltrampoline_start:\n");
2169 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2170 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2171 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2172 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2173 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2174 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2176 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2177 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2180 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2183 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2185 rtx fnaddr, mem, a_tramp;
2187 emit_block_move (m_tramp, assemble_trampoline_template (),
2188 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2190 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2191 emit_move_insn (mem, chain_value);
2193 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2194 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2195 emit_move_insn (mem, fnaddr);
2197 a_tramp = XEXP (m_tramp, 0);
2198 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2199 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2200 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2203 /* Thumb trampolines should be entered in thumb mode, so set
2204 the bottom bit of the address. */
2207 arm_trampoline_adjust_address (rtx addr)
2210 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2211 NULL, 0, OPTAB_LIB_WIDEN);
2215 /* Return 1 if it is possible to return using a single instruction.
2216 If SIBLING is non-null, this is a test for a return before a sibling
2217 call. SIBLING is the call insn, so we can examine its register usage. */
2220 use_return_insn (int iscond, rtx sibling)
2223 unsigned int func_type;
2224 unsigned long saved_int_regs;
2225 unsigned HOST_WIDE_INT stack_adjust;
2226 arm_stack_offsets *offsets;
2228 /* Never use a return instruction before reload has run. */
2229 if (!reload_completed)
2232 func_type = arm_current_func_type ();
2234 /* Naked, volatile and stack alignment functions need special
2236 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2239 /* So do interrupt functions that use the frame pointer and Thumb
2240 interrupt functions. */
2241 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2244 offsets = arm_get_frame_offsets ();
2245 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2247 /* As do variadic functions. */
2248 if (crtl->args.pretend_args_size
2249 || cfun->machine->uses_anonymous_args
2250 /* Or if the function calls __builtin_eh_return () */
2251 || crtl->calls_eh_return
2252 /* Or if the function calls alloca */
2253 || cfun->calls_alloca
2254 /* Or if there is a stack adjustment. However, if the stack pointer
2255 is saved on the stack, we can use a pre-incrementing stack load. */
2256 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2257 && stack_adjust == 4)))
2260 saved_int_regs = offsets->saved_regs_mask;
2262 /* Unfortunately, the insn
2264 ldmib sp, {..., sp, ...}
2266 triggers a bug on most SA-110 based devices, such that the stack
2267 pointer won't be correctly restored if the instruction takes a
2268 page fault. We work around this problem by popping r3 along with
2269 the other registers, since that is never slower than executing
2270 another instruction.
2272 We test for !arm_arch5 here, because code for any architecture
2273 less than this could potentially be run on one of the buggy
2275 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2277 /* Validate that r3 is a call-clobbered register (always true in
2278 the default abi) ... */
2279 if (!call_used_regs[3])
2282 /* ... that it isn't being used for a return value ... */
2283 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2286 /* ... or for a tail-call argument ... */
2289 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2291 if (find_regno_fusage (sibling, USE, 3))
2295 /* ... and that there are no call-saved registers in r0-r2
2296 (always true in the default ABI). */
2297 if (saved_int_regs & 0x7)
2301 /* Can't be done if interworking with Thumb, and any registers have been
2303 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2306 /* On StrongARM, conditional returns are expensive if they aren't
2307 taken and multiple registers have been stacked. */
2308 if (iscond && arm_tune_strongarm)
2310 /* Conditional return when just the LR is stored is a simple
2311 conditional-load instruction, that's not expensive. */
2312 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2316 && arm_pic_register != INVALID_REGNUM
2317 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2321 /* If there are saved registers but the LR isn't saved, then we need
2322 two instructions for the return. */
2323 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2326 /* Can't be done if any of the FPA regs are pushed,
2327 since this also requires an insn. */
2328 if (TARGET_HARD_FLOAT && TARGET_FPA)
2329 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2330 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2333 /* Likewise VFP regs. */
2334 if (TARGET_HARD_FLOAT && TARGET_VFP)
2335 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2336 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2339 if (TARGET_REALLY_IWMMXT)
2340 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2341 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2347 /* Return TRUE if int I is a valid immediate ARM constant. */
2350 const_ok_for_arm (HOST_WIDE_INT i)
2354 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2355 be all zero, or all one. */
2356 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2357 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2358 != ((~(unsigned HOST_WIDE_INT) 0)
2359 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2362 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2364 /* Fast return for 0 and small values. We must do this for zero, since
2365 the code below can't handle that one case. */
2366 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2369 /* Get the number of trailing zeros. */
2370 lowbit = ffs((int) i) - 1;
2372 /* Only even shifts are allowed in ARM mode so round down to the
2373 nearest even number. */
2377 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2382 /* Allow rotated constants in ARM mode. */
2384 && ((i & ~0xc000003f) == 0
2385 || (i & ~0xf000000f) == 0
2386 || (i & ~0xfc000003) == 0))
2393 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2396 if (i == v || i == (v | (v << 8)))
2399 /* Allow repeated pattern 0xXY00XY00. */
2409 /* Return true if I is a valid constant for the operation CODE. */
2411 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2413 if (const_ok_for_arm (i))
2419 /* See if we can use movw. */
2420 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2423 /* Otherwise, try mvn. */
2424 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2427 /* See if we can use addw or subw. */
2429 && ((i & 0xfffff000) == 0
2430 || ((-i) & 0xfffff000) == 0))
2432 /* else fall through. */
2452 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2454 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2460 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2464 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2471 /* Emit a sequence of insns to handle a large constant.
2472 CODE is the code of the operation required, it can be any of SET, PLUS,
2473 IOR, AND, XOR, MINUS;
2474 MODE is the mode in which the operation is being performed;
2475 VAL is the integer to operate on;
2476 SOURCE is the other operand (a register, or a null-pointer for SET);
2477 SUBTARGETS means it is safe to create scratch registers if that will
2478 either produce a simpler sequence, or we will want to cse the values.
2479 Return value is the number of insns emitted. */
2481 /* ??? Tweak this for thumb2. */
2483 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2484 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2488 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2489 cond = COND_EXEC_TEST (PATTERN (insn));
2493 if (subtargets || code == SET
2494 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2495 && REGNO (target) != REGNO (source)))
2497 /* After arm_reorg has been called, we can't fix up expensive
2498 constants by pushing them into memory so we must synthesize
2499 them in-line, regardless of the cost. This is only likely to
2500 be more costly on chips that have load delay slots and we are
2501 compiling without running the scheduler (so no splitting
2502 occurred before the final instruction emission).
2504 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2506 if (!after_arm_reorg
2508 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2510 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2515 /* Currently SET is the only monadic value for CODE, all
2516 the rest are diadic. */
2517 if (TARGET_USE_MOVT)
2518 arm_emit_movpair (target, GEN_INT (val));
2520 emit_set_insn (target, GEN_INT (val));
2526 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2528 if (TARGET_USE_MOVT)
2529 arm_emit_movpair (temp, GEN_INT (val));
2531 emit_set_insn (temp, GEN_INT (val));
2533 /* For MINUS, the value is subtracted from, since we never
2534 have subtraction of a constant. */
2536 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2538 emit_set_insn (target,
2539 gen_rtx_fmt_ee (code, mode, source, temp));
2545 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2549 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2550 ARM/THUMB2 immediates, and add up to VAL.
2551 Thr function return value gives the number of insns required. */
2553 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2554 struct four_ints *return_sequence)
2556 int best_consecutive_zeros = 0;
2560 struct four_ints tmp_sequence;
2562 /* If we aren't targetting ARM, the best place to start is always at
2563 the bottom, otherwise look more closely. */
2566 for (i = 0; i < 32; i += 2)
2568 int consecutive_zeros = 0;
2570 if (!(val & (3 << i)))
2572 while ((i < 32) && !(val & (3 << i)))
2574 consecutive_zeros += 2;
2577 if (consecutive_zeros > best_consecutive_zeros)
2579 best_consecutive_zeros = consecutive_zeros;
2580 best_start = i - consecutive_zeros;
2587 /* So long as it won't require any more insns to do so, it's
2588 desirable to emit a small constant (in bits 0...9) in the last
2589 insn. This way there is more chance that it can be combined with
2590 a later addressing insn to form a pre-indexed load or store
2591 operation. Consider:
2593 *((volatile int *)0xe0000100) = 1;
2594 *((volatile int *)0xe0000110) = 2;
2596 We want this to wind up as:
2600 str rB, [rA, #0x100]
2602 str rB, [rA, #0x110]
2604 rather than having to synthesize both large constants from scratch.
2606 Therefore, we calculate how many insns would be required to emit
2607 the constant starting from `best_start', and also starting from
2608 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2609 yield a shorter sequence, we may as well use zero. */
2610 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2612 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2614 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2615 if (insns2 <= insns1)
2617 *return_sequence = tmp_sequence;
2625 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2627 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2628 struct four_ints *return_sequence, int i)
2630 int remainder = val & 0xffffffff;
2633 /* Try and find a way of doing the job in either two or three
2636 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2637 location. We start at position I. This may be the MSB, or
2638 optimial_immediate_sequence may have positioned it at the largest block
2639 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2640 wrapping around to the top of the word when we drop off the bottom.
2641 In the worst case this code should produce no more than four insns.
2643 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2644 constants, shifted to any arbitrary location. We should always start
2649 unsigned int b1, b2, b3, b4;
2650 unsigned HOST_WIDE_INT result;
2653 gcc_assert (insns < 4);
2658 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2659 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2662 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2663 /* We can use addw/subw for the last 12 bits. */
2667 /* Use an 8-bit shifted/rotated immediate. */
2671 result = remainder & ((0x0ff << end)
2672 | ((i < end) ? (0xff >> (32 - end))
2679 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2680 arbitrary shifts. */
2681 i -= TARGET_ARM ? 2 : 1;
2685 /* Next, see if we can do a better job with a thumb2 replicated
2688 We do it this way around to catch the cases like 0x01F001E0 where
2689 two 8-bit immediates would work, but a replicated constant would
2692 TODO: 16-bit constants that don't clear all the bits, but still win.
2693 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2696 b1 = (remainder & 0xff000000) >> 24;
2697 b2 = (remainder & 0x00ff0000) >> 16;
2698 b3 = (remainder & 0x0000ff00) >> 8;
2699 b4 = remainder & 0xff;
2703 /* The 8-bit immediate already found clears b1 (and maybe b2),
2704 but must leave b3 and b4 alone. */
2706 /* First try to find a 32-bit replicated constant that clears
2707 almost everything. We can assume that we can't do it in one,
2708 or else we wouldn't be here. */
2709 unsigned int tmp = b1 & b2 & b3 & b4;
2710 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2712 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2713 + (tmp == b3) + (tmp == b4);
2715 && (matching_bytes >= 3
2716 || (matching_bytes == 2
2717 && const_ok_for_op (remainder & ~tmp2, code))))
2719 /* At least 3 of the bytes match, and the fourth has at
2720 least as many bits set, or two of the bytes match
2721 and it will only require one more insn to finish. */
2729 /* Second, try to find a 16-bit replicated constant that can
2730 leave three of the bytes clear. If b2 or b4 is already
2731 zero, then we can. If the 8-bit from above would not
2732 clear b2 anyway, then we still win. */
2733 else if (b1 == b3 && (!b2 || !b4
2734 || (remainder & 0x00ff0000 & ~result)))
2736 result = remainder & 0xff00ff00;
2742 /* The 8-bit immediate already found clears b2 (and maybe b3)
2743 and we don't get here unless b1 is alredy clear, but it will
2744 leave b4 unchanged. */
2746 /* If we can clear b2 and b4 at once, then we win, since the
2747 8-bits couldn't possibly reach that far. */
2750 result = remainder & 0x00ff00ff;
2756 return_sequence->i[insns++] = result;
2757 remainder &= ~result;
2759 if (code == SET || code == MINUS)
2767 /* Emit an instruction with the indicated PATTERN. If COND is
2768 non-NULL, conditionalize the execution of the instruction on COND
2772 emit_constant_insn (rtx cond, rtx pattern)
2775 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2776 emit_insn (pattern);
2779 /* As above, but extra parameter GENERATE which, if clear, suppresses
2783 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2784 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2789 int final_invert = 0;
2791 int set_sign_bit_copies = 0;
2792 int clear_sign_bit_copies = 0;
2793 int clear_zero_bit_copies = 0;
2794 int set_zero_bit_copies = 0;
2795 int insns = 0, neg_insns, inv_insns;
2796 unsigned HOST_WIDE_INT temp1, temp2;
2797 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2798 struct four_ints *immediates;
2799 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2801 /* Find out which operations are safe for a given CODE. Also do a quick
2802 check for degenerate cases; these can occur when DImode operations
2815 if (remainder == 0xffffffff)
2818 emit_constant_insn (cond,
2819 gen_rtx_SET (VOIDmode, target,
2820 GEN_INT (ARM_SIGN_EXTEND (val))));
2826 if (reload_completed && rtx_equal_p (target, source))
2830 emit_constant_insn (cond,
2831 gen_rtx_SET (VOIDmode, target, source));
2840 emit_constant_insn (cond,
2841 gen_rtx_SET (VOIDmode, target, const0_rtx));
2844 if (remainder == 0xffffffff)
2846 if (reload_completed && rtx_equal_p (target, source))
2849 emit_constant_insn (cond,
2850 gen_rtx_SET (VOIDmode, target, source));
2859 if (reload_completed && rtx_equal_p (target, source))
2862 emit_constant_insn (cond,
2863 gen_rtx_SET (VOIDmode, target, source));
2867 if (remainder == 0xffffffff)
2870 emit_constant_insn (cond,
2871 gen_rtx_SET (VOIDmode, target,
2872 gen_rtx_NOT (mode, source)));
2879 /* We treat MINUS as (val - source), since (source - val) is always
2880 passed as (source + (-val)). */
2884 emit_constant_insn (cond,
2885 gen_rtx_SET (VOIDmode, target,
2886 gen_rtx_NEG (mode, source)));
2889 if (const_ok_for_arm (val))
2892 emit_constant_insn (cond,
2893 gen_rtx_SET (VOIDmode, target,
2894 gen_rtx_MINUS (mode, GEN_INT (val),
2905 /* If we can do it in one insn get out quickly. */
2906 if (const_ok_for_op (val, code))
2909 emit_constant_insn (cond,
2910 gen_rtx_SET (VOIDmode, target,
2912 ? gen_rtx_fmt_ee (code, mode, source,
2918 /* Calculate a few attributes that may be useful for specific
2920 /* Count number of leading zeros. */
2921 for (i = 31; i >= 0; i--)
2923 if ((remainder & (1 << i)) == 0)
2924 clear_sign_bit_copies++;
2929 /* Count number of leading 1's. */
2930 for (i = 31; i >= 0; i--)
2932 if ((remainder & (1 << i)) != 0)
2933 set_sign_bit_copies++;
2938 /* Count number of trailing zero's. */
2939 for (i = 0; i <= 31; i++)
2941 if ((remainder & (1 << i)) == 0)
2942 clear_zero_bit_copies++;
2947 /* Count number of trailing 1's. */
2948 for (i = 0; i <= 31; i++)
2950 if ((remainder & (1 << i)) != 0)
2951 set_zero_bit_copies++;
2959 /* See if we can do this by sign_extending a constant that is known
2960 to be negative. This is a good, way of doing it, since the shift
2961 may well merge into a subsequent insn. */
2962 if (set_sign_bit_copies > 1)
2964 if (const_ok_for_arm
2965 (temp1 = ARM_SIGN_EXTEND (remainder
2966 << (set_sign_bit_copies - 1))))
2970 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2971 emit_constant_insn (cond,
2972 gen_rtx_SET (VOIDmode, new_src,
2974 emit_constant_insn (cond,
2975 gen_ashrsi3 (target, new_src,
2976 GEN_INT (set_sign_bit_copies - 1)));
2980 /* For an inverted constant, we will need to set the low bits,
2981 these will be shifted out of harm's way. */
2982 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2983 if (const_ok_for_arm (~temp1))
2987 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2988 emit_constant_insn (cond,
2989 gen_rtx_SET (VOIDmode, new_src,
2991 emit_constant_insn (cond,
2992 gen_ashrsi3 (target, new_src,
2993 GEN_INT (set_sign_bit_copies - 1)));
2999 /* See if we can calculate the value as the difference between two
3000 valid immediates. */
3001 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3003 int topshift = clear_sign_bit_copies & ~1;
3005 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3006 & (0xff000000 >> topshift));
3008 /* If temp1 is zero, then that means the 9 most significant
3009 bits of remainder were 1 and we've caused it to overflow.
3010 When topshift is 0 we don't need to do anything since we
3011 can borrow from 'bit 32'. */
3012 if (temp1 == 0 && topshift != 0)
3013 temp1 = 0x80000000 >> (topshift - 1);
3015 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3017 if (const_ok_for_arm (temp2))
3021 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3022 emit_constant_insn (cond,
3023 gen_rtx_SET (VOIDmode, new_src,
3025 emit_constant_insn (cond,
3026 gen_addsi3 (target, new_src,
3034 /* See if we can generate this by setting the bottom (or the top)
3035 16 bits, and then shifting these into the other half of the
3036 word. We only look for the simplest cases, to do more would cost
3037 too much. Be careful, however, not to generate this when the
3038 alternative would take fewer insns. */
3039 if (val & 0xffff0000)
3041 temp1 = remainder & 0xffff0000;
3042 temp2 = remainder & 0x0000ffff;
3044 /* Overlaps outside this range are best done using other methods. */
3045 for (i = 9; i < 24; i++)
3047 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3048 && !const_ok_for_arm (temp2))
3050 rtx new_src = (subtargets
3051 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3053 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3054 source, subtargets, generate);
3062 gen_rtx_ASHIFT (mode, source,
3069 /* Don't duplicate cases already considered. */
3070 for (i = 17; i < 24; i++)
3072 if (((temp1 | (temp1 >> i)) == remainder)
3073 && !const_ok_for_arm (temp1))
3075 rtx new_src = (subtargets
3076 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3078 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3079 source, subtargets, generate);
3084 gen_rtx_SET (VOIDmode, target,
3087 gen_rtx_LSHIFTRT (mode, source,
3098 /* If we have IOR or XOR, and the constant can be loaded in a
3099 single instruction, and we can find a temporary to put it in,
3100 then this can be done in two instructions instead of 3-4. */
3102 /* TARGET can't be NULL if SUBTARGETS is 0 */
3103 || (reload_completed && !reg_mentioned_p (target, source)))
3105 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3109 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3111 emit_constant_insn (cond,
3112 gen_rtx_SET (VOIDmode, sub,
3114 emit_constant_insn (cond,
3115 gen_rtx_SET (VOIDmode, target,
3116 gen_rtx_fmt_ee (code, mode,
3127 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3128 and the remainder 0s for e.g. 0xfff00000)
3129 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3131 This can be done in 2 instructions by using shifts with mov or mvn.
3136 mvn r0, r0, lsr #12 */
3137 if (set_sign_bit_copies > 8
3138 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3142 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3143 rtx shift = GEN_INT (set_sign_bit_copies);
3147 gen_rtx_SET (VOIDmode, sub,
3149 gen_rtx_ASHIFT (mode,
3154 gen_rtx_SET (VOIDmode, target,
3156 gen_rtx_LSHIFTRT (mode, sub,
3163 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3165 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3167 For eg. r0 = r0 | 0xfff
3172 if (set_zero_bit_copies > 8
3173 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3177 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3178 rtx shift = GEN_INT (set_zero_bit_copies);
3182 gen_rtx_SET (VOIDmode, sub,
3184 gen_rtx_LSHIFTRT (mode,
3189 gen_rtx_SET (VOIDmode, target,
3191 gen_rtx_ASHIFT (mode, sub,
3197 /* This will never be reached for Thumb2 because orn is a valid
3198 instruction. This is for Thumb1 and the ARM 32 bit cases.
3200 x = y | constant (such that ~constant is a valid constant)
3202 x = ~(~y & ~constant).
3204 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3208 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3209 emit_constant_insn (cond,
3210 gen_rtx_SET (VOIDmode, sub,
3211 gen_rtx_NOT (mode, source)));
3214 sub = gen_reg_rtx (mode);
3215 emit_constant_insn (cond,
3216 gen_rtx_SET (VOIDmode, sub,
3217 gen_rtx_AND (mode, source,
3219 emit_constant_insn (cond,
3220 gen_rtx_SET (VOIDmode, target,
3221 gen_rtx_NOT (mode, sub)));
3228 /* See if two shifts will do 2 or more insn's worth of work. */
3229 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3231 HOST_WIDE_INT shift_mask = ((0xffffffff
3232 << (32 - clear_sign_bit_copies))
3235 if ((remainder | shift_mask) != 0xffffffff)
3239 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3240 insns = arm_gen_constant (AND, mode, cond,
3241 remainder | shift_mask,
3242 new_src, source, subtargets, 1);
3247 rtx targ = subtargets ? NULL_RTX : target;
3248 insns = arm_gen_constant (AND, mode, cond,
3249 remainder | shift_mask,
3250 targ, source, subtargets, 0);
3256 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3257 rtx shift = GEN_INT (clear_sign_bit_copies);
3259 emit_insn (gen_ashlsi3 (new_src, source, shift));
3260 emit_insn (gen_lshrsi3 (target, new_src, shift));
3266 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3268 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3270 if ((remainder | shift_mask) != 0xffffffff)
3274 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3276 insns = arm_gen_constant (AND, mode, cond,
3277 remainder | shift_mask,
3278 new_src, source, subtargets, 1);
3283 rtx targ = subtargets ? NULL_RTX : target;
3285 insns = arm_gen_constant (AND, mode, cond,
3286 remainder | shift_mask,
3287 targ, source, subtargets, 0);
3293 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3294 rtx shift = GEN_INT (clear_zero_bit_copies);
3296 emit_insn (gen_lshrsi3 (new_src, source, shift));
3297 emit_insn (gen_ashlsi3 (target, new_src, shift));
3309 /* Calculate what the instruction sequences would be if we generated it
3310 normally, negated, or inverted. */
3312 /* AND cannot be split into multiple insns, so invert and use BIC. */
3315 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3318 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3323 if (can_invert || final_invert)
3324 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3329 immediates = &pos_immediates;
3331 /* Is the negated immediate sequence more efficient? */
3332 if (neg_insns < insns && neg_insns <= inv_insns)
3335 immediates = &neg_immediates;
3340 /* Is the inverted immediate sequence more efficient?
3341 We must allow for an extra NOT instruction for XOR operations, although
3342 there is some chance that the final 'mvn' will get optimized later. */
3343 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3346 immediates = &inv_immediates;
3354 /* Now output the chosen sequence as instructions. */
3357 for (i = 0; i < insns; i++)
3359 rtx new_src, temp1_rtx;
3361 temp1 = immediates->i[i];
3363 if (code == SET || code == MINUS)
3364 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3365 else if ((final_invert || i < (insns - 1)) && subtargets)
3366 new_src = gen_reg_rtx (mode);
3372 else if (can_negate)
3375 temp1 = trunc_int_for_mode (temp1, mode);
3376 temp1_rtx = GEN_INT (temp1);
3380 else if (code == MINUS)
3381 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3383 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3385 emit_constant_insn (cond,
3386 gen_rtx_SET (VOIDmode, new_src,
3392 can_negate = can_invert;
3396 else if (code == MINUS)
3404 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3405 gen_rtx_NOT (mode, source)));
3412 /* Canonicalize a comparison so that we are more likely to recognize it.
3413 This can be done for a few constant compares, where we can make the
3414 immediate value easier to load. */
3417 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3419 enum machine_mode mode;
3420 unsigned HOST_WIDE_INT i, maxval;
3422 mode = GET_MODE (*op0);
3423 if (mode == VOIDmode)
3424 mode = GET_MODE (*op1);
3426 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3428 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3429 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3430 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3431 for GTU/LEU in Thumb mode. */
3436 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3438 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3441 if (code == GT || code == LE
3442 || (!TARGET_ARM && (code == GTU || code == LEU)))
3444 /* Missing comparison. First try to use an available
3446 if (GET_CODE (*op1) == CONST_INT)
3454 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3456 *op1 = GEN_INT (i + 1);
3457 return code == GT ? GE : LT;
3462 if (i != ~((unsigned HOST_WIDE_INT) 0)
3463 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3465 *op1 = GEN_INT (i + 1);
3466 return code == GTU ? GEU : LTU;
3474 /* If that did not work, reverse the condition. */
3478 return swap_condition (code);
3484 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3485 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3486 to facilitate possible combining with a cmp into 'ands'. */
3488 && GET_CODE (*op0) == ZERO_EXTEND
3489 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3490 && GET_MODE (XEXP (*op0, 0)) == QImode
3491 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3492 && subreg_lowpart_p (XEXP (*op0, 0))
3493 && *op1 == const0_rtx)
3494 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3497 /* Comparisons smaller than DImode. Only adjust comparisons against
3498 an out-of-range constant. */
3499 if (GET_CODE (*op1) != CONST_INT
3500 || const_ok_for_arm (INTVAL (*op1))
3501 || const_ok_for_arm (- INTVAL (*op1)))
3515 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3517 *op1 = GEN_INT (i + 1);
3518 return code == GT ? GE : LT;
3525 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3527 *op1 = GEN_INT (i - 1);
3528 return code == GE ? GT : LE;
3534 if (i != ~((unsigned HOST_WIDE_INT) 0)
3535 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3537 *op1 = GEN_INT (i + 1);
3538 return code == GTU ? GEU : LTU;
3545 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3547 *op1 = GEN_INT (i - 1);
3548 return code == GEU ? GTU : LEU;
3560 /* Define how to find the value returned by a function. */
3563 arm_function_value(const_tree type, const_tree func,
3564 bool outgoing ATTRIBUTE_UNUSED)
3566 enum machine_mode mode;
3567 int unsignedp ATTRIBUTE_UNUSED;
3568 rtx r ATTRIBUTE_UNUSED;
3570 mode = TYPE_MODE (type);
3572 if (TARGET_AAPCS_BASED)
3573 return aapcs_allocate_return_reg (mode, type, func);
3575 /* Promote integer types. */
3576 if (INTEGRAL_TYPE_P (type))
3577 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3579 /* Promotes small structs returned in a register to full-word size
3580 for big-endian AAPCS. */
3581 if (arm_return_in_msb (type))
3583 HOST_WIDE_INT size = int_size_in_bytes (type);
3584 if (size % UNITS_PER_WORD != 0)
3586 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3587 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3591 return LIBCALL_VALUE (mode);
3595 libcall_eq (const void *p1, const void *p2)
3597 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3601 libcall_hash (const void *p1)
3603 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3607 add_libcall (htab_t htab, rtx libcall)
3609 *htab_find_slot (htab, libcall, INSERT) = libcall;
3613 arm_libcall_uses_aapcs_base (const_rtx libcall)
3615 static bool init_done = false;
3616 static htab_t libcall_htab;
3622 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3624 add_libcall (libcall_htab,
3625 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3626 add_libcall (libcall_htab,
3627 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3628 add_libcall (libcall_htab,
3629 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3630 add_libcall (libcall_htab,
3631 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3633 add_libcall (libcall_htab,
3634 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3635 add_libcall (libcall_htab,
3636 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3637 add_libcall (libcall_htab,
3638 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3639 add_libcall (libcall_htab,
3640 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3642 add_libcall (libcall_htab,
3643 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3644 add_libcall (libcall_htab,
3645 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3646 add_libcall (libcall_htab,
3647 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3648 add_libcall (libcall_htab,
3649 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3650 add_libcall (libcall_htab,
3651 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3652 add_libcall (libcall_htab,
3653 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3655 /* Values from double-precision helper functions are returned in core
3656 registers if the selected core only supports single-precision
3657 arithmetic, even if we are using the hard-float ABI. The same is
3658 true for single-precision helpers, but we will never be using the
3659 hard-float ABI on a CPU which doesn't support single-precision
3660 operations in hardware. */
3661 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3662 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3663 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3664 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3665 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3666 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3667 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3668 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3669 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3670 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3671 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3672 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3674 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3678 return libcall && htab_find (libcall_htab, libcall) != NULL;
3682 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3684 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3685 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3687 /* The following libcalls return their result in integer registers,
3688 even though they return a floating point value. */
3689 if (arm_libcall_uses_aapcs_base (libcall))
3690 return gen_rtx_REG (mode, ARG_REGISTER(1));
3694 return LIBCALL_VALUE (mode);
3697 /* Determine the amount of memory needed to store the possible return
3698 registers of an untyped call. */
3700 arm_apply_result_size (void)
3706 if (TARGET_HARD_FLOAT_ABI)
3712 if (TARGET_MAVERICK)
3715 if (TARGET_IWMMXT_ABI)
3722 /* Decide whether TYPE should be returned in memory (true)
3723 or in a register (false). FNTYPE is the type of the function making
3726 arm_return_in_memory (const_tree type, const_tree fntype)
3730 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3732 if (TARGET_AAPCS_BASED)
3734 /* Simple, non-aggregate types (ie not including vectors and
3735 complex) are always returned in a register (or registers).
3736 We don't care about which register here, so we can short-cut
3737 some of the detail. */
3738 if (!AGGREGATE_TYPE_P (type)
3739 && TREE_CODE (type) != VECTOR_TYPE
3740 && TREE_CODE (type) != COMPLEX_TYPE)
3743 /* Any return value that is no larger than one word can be
3745 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3748 /* Check any available co-processors to see if they accept the
3749 type as a register candidate (VFP, for example, can return
3750 some aggregates in consecutive registers). These aren't
3751 available if the call is variadic. */
3752 if (aapcs_select_return_coproc (type, fntype) >= 0)
3755 /* Vector values should be returned using ARM registers, not
3756 memory (unless they're over 16 bytes, which will break since
3757 we only have four call-clobbered registers to play with). */
3758 if (TREE_CODE (type) == VECTOR_TYPE)
3759 return (size < 0 || size > (4 * UNITS_PER_WORD));
3761 /* The rest go in memory. */
3765 if (TREE_CODE (type) == VECTOR_TYPE)
3766 return (size < 0 || size > (4 * UNITS_PER_WORD));
3768 if (!AGGREGATE_TYPE_P (type) &&
3769 (TREE_CODE (type) != VECTOR_TYPE))
3770 /* All simple types are returned in registers. */
3773 if (arm_abi != ARM_ABI_APCS)
3775 /* ATPCS and later return aggregate types in memory only if they are
3776 larger than a word (or are variable size). */
3777 return (size < 0 || size > UNITS_PER_WORD);
3780 /* For the arm-wince targets we choose to be compatible with Microsoft's
3781 ARM and Thumb compilers, which always return aggregates in memory. */
3783 /* All structures/unions bigger than one word are returned in memory.
3784 Also catch the case where int_size_in_bytes returns -1. In this case
3785 the aggregate is either huge or of variable size, and in either case
3786 we will want to return it via memory and not in a register. */
3787 if (size < 0 || size > UNITS_PER_WORD)
3790 if (TREE_CODE (type) == RECORD_TYPE)
3794 /* For a struct the APCS says that we only return in a register
3795 if the type is 'integer like' and every addressable element
3796 has an offset of zero. For practical purposes this means
3797 that the structure can have at most one non bit-field element
3798 and that this element must be the first one in the structure. */
3800 /* Find the first field, ignoring non FIELD_DECL things which will
3801 have been created by C++. */
3802 for (field = TYPE_FIELDS (type);
3803 field && TREE_CODE (field) != FIELD_DECL;
3804 field = DECL_CHAIN (field))
3808 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3810 /* Check that the first field is valid for returning in a register. */
3812 /* ... Floats are not allowed */
3813 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3816 /* ... Aggregates that are not themselves valid for returning in
3817 a register are not allowed. */
3818 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3821 /* Now check the remaining fields, if any. Only bitfields are allowed,
3822 since they are not addressable. */
3823 for (field = DECL_CHAIN (field);
3825 field = DECL_CHAIN (field))
3827 if (TREE_CODE (field) != FIELD_DECL)
3830 if (!DECL_BIT_FIELD_TYPE (field))
3837 if (TREE_CODE (type) == UNION_TYPE)
3841 /* Unions can be returned in registers if every element is
3842 integral, or can be returned in an integer register. */
3843 for (field = TYPE_FIELDS (type);
3845 field = DECL_CHAIN (field))
3847 if (TREE_CODE (field) != FIELD_DECL)
3850 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3853 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3859 #endif /* not ARM_WINCE */
3861 /* Return all other types in memory. */
3865 /* Indicate whether or not words of a double are in big-endian order. */
3868 arm_float_words_big_endian (void)
3870 if (TARGET_MAVERICK)
3873 /* For FPA, float words are always big-endian. For VFP, floats words
3874 follow the memory system mode. */
3882 return (TARGET_BIG_END ? 1 : 0);
3887 const struct pcs_attribute_arg
3891 } pcs_attribute_args[] =
3893 {"aapcs", ARM_PCS_AAPCS},
3894 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3896 /* We could recognize these, but changes would be needed elsewhere
3897 * to implement them. */
3898 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3899 {"atpcs", ARM_PCS_ATPCS},
3900 {"apcs", ARM_PCS_APCS},
3902 {NULL, ARM_PCS_UNKNOWN}
3906 arm_pcs_from_attribute (tree attr)
3908 const struct pcs_attribute_arg *ptr;
3911 /* Get the value of the argument. */
3912 if (TREE_VALUE (attr) == NULL_TREE
3913 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3914 return ARM_PCS_UNKNOWN;
3916 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3918 /* Check it against the list of known arguments. */
3919 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3920 if (streq (arg, ptr->arg))
3923 /* An unrecognized interrupt type. */
3924 return ARM_PCS_UNKNOWN;
3927 /* Get the PCS variant to use for this call. TYPE is the function's type
3928 specification, DECL is the specific declartion. DECL may be null if
3929 the call could be indirect or if this is a library call. */
3931 arm_get_pcs_model (const_tree type, const_tree decl)
3933 bool user_convention = false;
3934 enum arm_pcs user_pcs = arm_pcs_default;
3939 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3942 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3943 user_convention = true;
3946 if (TARGET_AAPCS_BASED)
3948 /* Detect varargs functions. These always use the base rules
3949 (no argument is ever a candidate for a co-processor
3951 bool base_rules = stdarg_p (type);
3953 if (user_convention)
3955 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3956 sorry ("non-AAPCS derived PCS variant");
3957 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3958 error ("variadic functions must use the base AAPCS variant");
3962 return ARM_PCS_AAPCS;
3963 else if (user_convention)
3965 else if (decl && flag_unit_at_a_time)
3967 /* Local functions never leak outside this compilation unit,
3968 so we are free to use whatever conventions are
3970 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3971 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3973 return ARM_PCS_AAPCS_LOCAL;
3976 else if (user_convention && user_pcs != arm_pcs_default)
3977 sorry ("PCS variant");
3979 /* For everything else we use the target's default. */
3980 return arm_pcs_default;
3985 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3986 const_tree fntype ATTRIBUTE_UNUSED,
3987 rtx libcall ATTRIBUTE_UNUSED,
3988 const_tree fndecl ATTRIBUTE_UNUSED)
3990 /* Record the unallocated VFP registers. */
3991 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3992 pcum->aapcs_vfp_reg_alloc = 0;
3995 /* Walk down the type tree of TYPE counting consecutive base elements.
3996 If *MODEP is VOIDmode, then set it to the first valid floating point
3997 type. If a non-floating point type is found, or if a floating point
3998 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3999 otherwise return the count in the sub-tree. */
4001 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4003 enum machine_mode mode;
4006 switch (TREE_CODE (type))
4009 mode = TYPE_MODE (type);
4010 if (mode != DFmode && mode != SFmode)
4013 if (*modep == VOIDmode)
4022 mode = TYPE_MODE (TREE_TYPE (type));
4023 if (mode != DFmode && mode != SFmode)
4026 if (*modep == VOIDmode)
4035 /* Use V2SImode and V4SImode as representatives of all 64-bit
4036 and 128-bit vector types, whether or not those modes are
4037 supported with the present options. */
4038 size = int_size_in_bytes (type);
4051 if (*modep == VOIDmode)
4054 /* Vector modes are considered to be opaque: two vectors are
4055 equivalent for the purposes of being homogeneous aggregates
4056 if they are the same size. */
4065 tree index = TYPE_DOMAIN (type);
4067 /* Can't handle incomplete types. */
4068 if (!COMPLETE_TYPE_P(type))
4071 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4074 || !TYPE_MAX_VALUE (index)
4075 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4076 || !TYPE_MIN_VALUE (index)
4077 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4081 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4082 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4084 /* There must be no padding. */
4085 if (!host_integerp (TYPE_SIZE (type), 1)
4086 || (tree_low_cst (TYPE_SIZE (type), 1)
4087 != count * GET_MODE_BITSIZE (*modep)))
4099 /* Can't handle incomplete types. */
4100 if (!COMPLETE_TYPE_P(type))
4103 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4105 if (TREE_CODE (field) != FIELD_DECL)
4108 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4114 /* There must be no padding. */
4115 if (!host_integerp (TYPE_SIZE (type), 1)
4116 || (tree_low_cst (TYPE_SIZE (type), 1)
4117 != count * GET_MODE_BITSIZE (*modep)))
4124 case QUAL_UNION_TYPE:
4126 /* These aren't very interesting except in a degenerate case. */
4131 /* Can't handle incomplete types. */
4132 if (!COMPLETE_TYPE_P(type))
4135 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4137 if (TREE_CODE (field) != FIELD_DECL)
4140 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4143 count = count > sub_count ? count : sub_count;
4146 /* There must be no padding. */
4147 if (!host_integerp (TYPE_SIZE (type), 1)
4148 || (tree_low_cst (TYPE_SIZE (type), 1)
4149 != count * GET_MODE_BITSIZE (*modep)))
4162 /* Return true if PCS_VARIANT should use VFP registers. */
4164 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4166 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4168 static bool seen_thumb1_vfp = false;
4170 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4172 sorry ("Thumb-1 hard-float VFP ABI");
4173 /* sorry() is not immediately fatal, so only display this once. */
4174 seen_thumb1_vfp = true;
4180 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4183 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4184 (TARGET_VFP_DOUBLE || !is_double));
4188 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4189 enum machine_mode mode, const_tree type,
4190 enum machine_mode *base_mode, int *count)
4192 enum machine_mode new_mode = VOIDmode;
4194 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4195 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4196 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4201 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4204 new_mode = (mode == DCmode ? DFmode : SFmode);
4206 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
4208 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4210 if (ag_count > 0 && ag_count <= 4)
4219 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4222 *base_mode = new_mode;
4227 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4228 enum machine_mode mode, const_tree type)
4230 int count ATTRIBUTE_UNUSED;
4231 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4233 if (!use_vfp_abi (pcs_variant, false))
4235 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4240 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4243 if (!use_vfp_abi (pcum->pcs_variant, false))
4246 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4247 &pcum->aapcs_vfp_rmode,
4248 &pcum->aapcs_vfp_rcount);
4252 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4253 const_tree type ATTRIBUTE_UNUSED)
4255 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4256 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4259 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4260 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4262 pcum->aapcs_vfp_reg_alloc = mask << regno;
4263 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4266 int rcount = pcum->aapcs_vfp_rcount;
4268 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4272 /* Avoid using unsupported vector modes. */
4273 if (rmode == V2SImode)
4275 else if (rmode == V4SImode)
4282 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4283 for (i = 0; i < rcount; i++)
4285 rtx tmp = gen_rtx_REG (rmode,
4286 FIRST_VFP_REGNUM + regno + i * rshift);
4287 tmp = gen_rtx_EXPR_LIST
4289 GEN_INT (i * GET_MODE_SIZE (rmode)));
4290 XVECEXP (par, 0, i) = tmp;
4293 pcum->aapcs_reg = par;
4296 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4303 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4304 enum machine_mode mode,
4305 const_tree type ATTRIBUTE_UNUSED)
4307 if (!use_vfp_abi (pcs_variant, false))
4310 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4313 enum machine_mode ag_mode;
4318 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4323 if (ag_mode == V2SImode)
4325 else if (ag_mode == V4SImode)
4331 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4332 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4333 for (i = 0; i < count; i++)
4335 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4336 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4337 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4338 XVECEXP (par, 0, i) = tmp;
4344 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4348 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4349 enum machine_mode mode ATTRIBUTE_UNUSED,
4350 const_tree type ATTRIBUTE_UNUSED)
4352 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4353 pcum->aapcs_vfp_reg_alloc = 0;
4357 #define AAPCS_CP(X) \
4359 aapcs_ ## X ## _cum_init, \
4360 aapcs_ ## X ## _is_call_candidate, \
4361 aapcs_ ## X ## _allocate, \
4362 aapcs_ ## X ## _is_return_candidate, \
4363 aapcs_ ## X ## _allocate_return_reg, \
4364 aapcs_ ## X ## _advance \
4367 /* Table of co-processors that can be used to pass arguments in
4368 registers. Idealy no arugment should be a candidate for more than
4369 one co-processor table entry, but the table is processed in order
4370 and stops after the first match. If that entry then fails to put
4371 the argument into a co-processor register, the argument will go on
4375 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4376 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4378 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4379 BLKmode) is a candidate for this co-processor's registers; this
4380 function should ignore any position-dependent state in
4381 CUMULATIVE_ARGS and only use call-type dependent information. */
4382 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4384 /* Return true if the argument does get a co-processor register; it
4385 should set aapcs_reg to an RTX of the register allocated as is
4386 required for a return from FUNCTION_ARG. */
4387 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4389 /* Return true if a result of mode MODE (or type TYPE if MODE is
4390 BLKmode) is can be returned in this co-processor's registers. */
4391 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4393 /* Allocate and return an RTX element to hold the return type of a
4394 call, this routine must not fail and will only be called if
4395 is_return_candidate returned true with the same parameters. */
4396 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4398 /* Finish processing this argument and prepare to start processing
4400 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4401 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4409 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4414 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4415 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4422 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4424 /* We aren't passed a decl, so we can't check that a call is local.
4425 However, it isn't clear that that would be a win anyway, since it
4426 might limit some tail-calling opportunities. */
4427 enum arm_pcs pcs_variant;
4431 const_tree fndecl = NULL_TREE;
4433 if (TREE_CODE (fntype) == FUNCTION_DECL)
4436 fntype = TREE_TYPE (fntype);
4439 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4442 pcs_variant = arm_pcs_default;
4444 if (pcs_variant != ARM_PCS_AAPCS)
4448 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4449 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4458 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4461 /* We aren't passed a decl, so we can't check that a call is local.
4462 However, it isn't clear that that would be a win anyway, since it
4463 might limit some tail-calling opportunities. */
4464 enum arm_pcs pcs_variant;
4465 int unsignedp ATTRIBUTE_UNUSED;
4469 const_tree fndecl = NULL_TREE;
4471 if (TREE_CODE (fntype) == FUNCTION_DECL)
4474 fntype = TREE_TYPE (fntype);
4477 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4480 pcs_variant = arm_pcs_default;
4482 /* Promote integer types. */
4483 if (type && INTEGRAL_TYPE_P (type))
4484 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4486 if (pcs_variant != ARM_PCS_AAPCS)
4490 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4491 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4493 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4497 /* Promotes small structs returned in a register to full-word size
4498 for big-endian AAPCS. */
4499 if (type && arm_return_in_msb (type))
4501 HOST_WIDE_INT size = int_size_in_bytes (type);
4502 if (size % UNITS_PER_WORD != 0)
4504 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4505 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4509 return gen_rtx_REG (mode, R0_REGNUM);
4513 aapcs_libcall_value (enum machine_mode mode)
4515 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4516 && GET_MODE_SIZE (mode) <= 4)
4519 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4522 /* Lay out a function argument using the AAPCS rules. The rule
4523 numbers referred to here are those in the AAPCS. */
4525 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4526 const_tree type, bool named)
4531 /* We only need to do this once per argument. */
4532 if (pcum->aapcs_arg_processed)
4535 pcum->aapcs_arg_processed = true;
4537 /* Special case: if named is false then we are handling an incoming
4538 anonymous argument which is on the stack. */
4542 /* Is this a potential co-processor register candidate? */
4543 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4545 int slot = aapcs_select_call_coproc (pcum, mode, type);
4546 pcum->aapcs_cprc_slot = slot;
4548 /* We don't have to apply any of the rules from part B of the
4549 preparation phase, these are handled elsewhere in the
4554 /* A Co-processor register candidate goes either in its own
4555 class of registers or on the stack. */
4556 if (!pcum->aapcs_cprc_failed[slot])
4558 /* C1.cp - Try to allocate the argument to co-processor
4560 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4563 /* C2.cp - Put the argument on the stack and note that we
4564 can't assign any more candidates in this slot. We also
4565 need to note that we have allocated stack space, so that
4566 we won't later try to split a non-cprc candidate between
4567 core registers and the stack. */
4568 pcum->aapcs_cprc_failed[slot] = true;
4569 pcum->can_split = false;
4572 /* We didn't get a register, so this argument goes on the
4574 gcc_assert (pcum->can_split == false);
4579 /* C3 - For double-word aligned arguments, round the NCRN up to the
4580 next even number. */
4581 ncrn = pcum->aapcs_ncrn;
4582 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4585 nregs = ARM_NUM_REGS2(mode, type);
4587 /* Sigh, this test should really assert that nregs > 0, but a GCC
4588 extension allows empty structs and then gives them empty size; it
4589 then allows such a structure to be passed by value. For some of
4590 the code below we have to pretend that such an argument has
4591 non-zero size so that we 'locate' it correctly either in
4592 registers or on the stack. */
4593 gcc_assert (nregs >= 0);
4595 nregs2 = nregs ? nregs : 1;
4597 /* C4 - Argument fits entirely in core registers. */
4598 if (ncrn + nregs2 <= NUM_ARG_REGS)
4600 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4601 pcum->aapcs_next_ncrn = ncrn + nregs;
4605 /* C5 - Some core registers left and there are no arguments already
4606 on the stack: split this argument between the remaining core
4607 registers and the stack. */
4608 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4610 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4611 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4612 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4616 /* C6 - NCRN is set to 4. */
4617 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4619 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4623 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4624 for a call to a function whose data type is FNTYPE.
4625 For a library call, FNTYPE is NULL. */
4627 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4629 tree fndecl ATTRIBUTE_UNUSED)
4631 /* Long call handling. */
4633 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4635 pcum->pcs_variant = arm_pcs_default;
4637 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4639 if (arm_libcall_uses_aapcs_base (libname))
4640 pcum->pcs_variant = ARM_PCS_AAPCS;
4642 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4643 pcum->aapcs_reg = NULL_RTX;
4644 pcum->aapcs_partial = 0;
4645 pcum->aapcs_arg_processed = false;
4646 pcum->aapcs_cprc_slot = -1;
4647 pcum->can_split = true;
4649 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4653 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4655 pcum->aapcs_cprc_failed[i] = false;
4656 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4664 /* On the ARM, the offset starts at 0. */
4666 pcum->iwmmxt_nregs = 0;
4667 pcum->can_split = true;
4669 /* Varargs vectors are treated the same as long long.
4670 named_count avoids having to change the way arm handles 'named' */
4671 pcum->named_count = 0;
4674 if (TARGET_REALLY_IWMMXT && fntype)
4678 for (fn_arg = TYPE_ARG_TYPES (fntype);
4680 fn_arg = TREE_CHAIN (fn_arg))
4681 pcum->named_count += 1;
4683 if (! pcum->named_count)
4684 pcum->named_count = INT_MAX;
4689 /* Return true if mode/type need doubleword alignment. */
4691 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4693 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4694 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4698 /* Determine where to put an argument to a function.
4699 Value is zero to push the argument on the stack,
4700 or a hard register in which to store the argument.
4702 MODE is the argument's machine mode.
4703 TYPE is the data type of the argument (as a tree).
4704 This is null for libcalls where that information may
4706 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4707 the preceding args and about the function being called.
4708 NAMED is nonzero if this argument is a named parameter
4709 (otherwise it is an extra parameter matching an ellipsis).
4711 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4712 other arguments are passed on the stack. If (NAMED == 0) (which happens
4713 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4714 defined), say it is passed in the stack (function_prologue will
4715 indeed make it pass in the stack if necessary). */
4718 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4719 const_tree type, bool named)
4721 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4724 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4725 a call insn (op3 of a call_value insn). */
4726 if (mode == VOIDmode)
4729 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4731 aapcs_layout_arg (pcum, mode, type, named);
4732 return pcum->aapcs_reg;
4735 /* Varargs vectors are treated the same as long long.
4736 named_count avoids having to change the way arm handles 'named' */
4737 if (TARGET_IWMMXT_ABI
4738 && arm_vector_mode_supported_p (mode)
4739 && pcum->named_count > pcum->nargs + 1)
4741 if (pcum->iwmmxt_nregs <= 9)
4742 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4745 pcum->can_split = false;
4750 /* Put doubleword aligned quantities in even register pairs. */
4752 && ARM_DOUBLEWORD_ALIGN
4753 && arm_needs_doubleword_align (mode, type))
4756 /* Only allow splitting an arg between regs and memory if all preceding
4757 args were allocated to regs. For args passed by reference we only count
4758 the reference pointer. */
4759 if (pcum->can_split)
4762 nregs = ARM_NUM_REGS2 (mode, type);
4764 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4767 return gen_rtx_REG (mode, pcum->nregs);
4771 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4773 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4774 ? DOUBLEWORD_ALIGNMENT
4779 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4780 tree type, bool named)
4782 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4783 int nregs = pcum->nregs;
4785 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4787 aapcs_layout_arg (pcum, mode, type, named);
4788 return pcum->aapcs_partial;
4791 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4794 if (NUM_ARG_REGS > nregs
4795 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4797 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4802 /* Update the data in PCUM to advance over an argument
4803 of mode MODE and data type TYPE.
4804 (TYPE is null for libcalls where that information may not be available.) */
4807 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4808 const_tree type, bool named)
4810 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4812 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4814 aapcs_layout_arg (pcum, mode, type, named);
4816 if (pcum->aapcs_cprc_slot >= 0)
4818 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4820 pcum->aapcs_cprc_slot = -1;
4823 /* Generic stuff. */
4824 pcum->aapcs_arg_processed = false;
4825 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4826 pcum->aapcs_reg = NULL_RTX;
4827 pcum->aapcs_partial = 0;
4832 if (arm_vector_mode_supported_p (mode)
4833 && pcum->named_count > pcum->nargs
4834 && TARGET_IWMMXT_ABI)
4835 pcum->iwmmxt_nregs += 1;
4837 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4841 /* Variable sized types are passed by reference. This is a GCC
4842 extension to the ARM ABI. */
4845 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4846 enum machine_mode mode ATTRIBUTE_UNUSED,
4847 const_tree type, bool named ATTRIBUTE_UNUSED)
4849 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4852 /* Encode the current state of the #pragma [no_]long_calls. */
4855 OFF, /* No #pragma [no_]long_calls is in effect. */
4856 LONG, /* #pragma long_calls is in effect. */
4857 SHORT /* #pragma no_long_calls is in effect. */
4860 static arm_pragma_enum arm_pragma_long_calls = OFF;
4863 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4865 arm_pragma_long_calls = LONG;
4869 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4871 arm_pragma_long_calls = SHORT;
4875 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4877 arm_pragma_long_calls = OFF;
4880 /* Handle an attribute requiring a FUNCTION_DECL;
4881 arguments as in struct attribute_spec.handler. */
4883 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4884 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4886 if (TREE_CODE (*node) != FUNCTION_DECL)
4888 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4890 *no_add_attrs = true;
4896 /* Handle an "interrupt" or "isr" attribute;
4897 arguments as in struct attribute_spec.handler. */
4899 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4904 if (TREE_CODE (*node) != FUNCTION_DECL)
4906 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4908 *no_add_attrs = true;
4910 /* FIXME: the argument if any is checked for type attributes;
4911 should it be checked for decl ones? */
4915 if (TREE_CODE (*node) == FUNCTION_TYPE
4916 || TREE_CODE (*node) == METHOD_TYPE)
4918 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4920 warning (OPT_Wattributes, "%qE attribute ignored",
4922 *no_add_attrs = true;
4925 else if (TREE_CODE (*node) == POINTER_TYPE
4926 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4927 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4928 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4930 *node = build_variant_type_copy (*node);
4931 TREE_TYPE (*node) = build_type_attribute_variant
4933 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4934 *no_add_attrs = true;
4938 /* Possibly pass this attribute on from the type to a decl. */
4939 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4940 | (int) ATTR_FLAG_FUNCTION_NEXT
4941 | (int) ATTR_FLAG_ARRAY_NEXT))
4943 *no_add_attrs = true;
4944 return tree_cons (name, args, NULL_TREE);
4948 warning (OPT_Wattributes, "%qE attribute ignored",
4957 /* Handle a "pcs" attribute; arguments as in struct
4958 attribute_spec.handler. */
4960 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4961 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4963 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4965 warning (OPT_Wattributes, "%qE attribute ignored", name);
4966 *no_add_attrs = true;
4971 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4972 /* Handle the "notshared" attribute. This attribute is another way of
4973 requesting hidden visibility. ARM's compiler supports
4974 "__declspec(notshared)"; we support the same thing via an
4978 arm_handle_notshared_attribute (tree *node,
4979 tree name ATTRIBUTE_UNUSED,
4980 tree args ATTRIBUTE_UNUSED,
4981 int flags ATTRIBUTE_UNUSED,
4984 tree decl = TYPE_NAME (*node);
4988 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4989 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4990 *no_add_attrs = false;
4996 /* Return 0 if the attributes for two types are incompatible, 1 if they
4997 are compatible, and 2 if they are nearly compatible (which causes a
4998 warning to be generated). */
5000 arm_comp_type_attributes (const_tree type1, const_tree type2)
5004 /* Check for mismatch of non-default calling convention. */
5005 if (TREE_CODE (type1) != FUNCTION_TYPE)
5008 /* Check for mismatched call attributes. */
5009 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5010 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5011 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5012 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5014 /* Only bother to check if an attribute is defined. */
5015 if (l1 | l2 | s1 | s2)
5017 /* If one type has an attribute, the other must have the same attribute. */
5018 if ((l1 != l2) || (s1 != s2))
5021 /* Disallow mixed attributes. */
5022 if ((l1 & s2) || (l2 & s1))
5026 /* Check for mismatched ISR attribute. */
5027 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5029 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5030 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5032 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5039 /* Assigns default attributes to newly defined type. This is used to
5040 set short_call/long_call attributes for function types of
5041 functions defined inside corresponding #pragma scopes. */
5043 arm_set_default_type_attributes (tree type)
5045 /* Add __attribute__ ((long_call)) to all functions, when
5046 inside #pragma long_calls or __attribute__ ((short_call)),
5047 when inside #pragma no_long_calls. */
5048 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5050 tree type_attr_list, attr_name;
5051 type_attr_list = TYPE_ATTRIBUTES (type);
5053 if (arm_pragma_long_calls == LONG)
5054 attr_name = get_identifier ("long_call");
5055 else if (arm_pragma_long_calls == SHORT)
5056 attr_name = get_identifier ("short_call");
5060 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5061 TYPE_ATTRIBUTES (type) = type_attr_list;
5065 /* Return true if DECL is known to be linked into section SECTION. */
5068 arm_function_in_section_p (tree decl, section *section)
5070 /* We can only be certain about functions defined in the same
5071 compilation unit. */
5072 if (!TREE_STATIC (decl))
5075 /* Make sure that SYMBOL always binds to the definition in this
5076 compilation unit. */
5077 if (!targetm.binds_local_p (decl))
5080 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5081 if (!DECL_SECTION_NAME (decl))
5083 /* Make sure that we will not create a unique section for DECL. */
5084 if (flag_function_sections || DECL_ONE_ONLY (decl))
5088 return function_section (decl) == section;
5091 /* Return nonzero if a 32-bit "long_call" should be generated for
5092 a call from the current function to DECL. We generate a long_call
5095 a. has an __attribute__((long call))
5096 or b. is within the scope of a #pragma long_calls
5097 or c. the -mlong-calls command line switch has been specified
5099 However we do not generate a long call if the function:
5101 d. has an __attribute__ ((short_call))
5102 or e. is inside the scope of a #pragma no_long_calls
5103 or f. is defined in the same section as the current function. */
5106 arm_is_long_call_p (tree decl)
5111 return TARGET_LONG_CALLS;
5113 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5114 if (lookup_attribute ("short_call", attrs))
5117 /* For "f", be conservative, and only cater for cases in which the
5118 whole of the current function is placed in the same section. */
5119 if (!flag_reorder_blocks_and_partition
5120 && TREE_CODE (decl) == FUNCTION_DECL
5121 && arm_function_in_section_p (decl, current_function_section ()))
5124 if (lookup_attribute ("long_call", attrs))
5127 return TARGET_LONG_CALLS;
5130 /* Return nonzero if it is ok to make a tail-call to DECL. */
5132 arm_function_ok_for_sibcall (tree decl, tree exp)
5134 unsigned long func_type;
5136 if (cfun->machine->sibcall_blocked)
5139 /* Never tailcall something for which we have no decl, or if we
5140 are generating code for Thumb-1. */
5141 if (decl == NULL || TARGET_THUMB1)
5144 /* The PIC register is live on entry to VxWorks PLT entries, so we
5145 must make the call before restoring the PIC register. */
5146 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5149 /* Cannot tail-call to long calls, since these are out of range of
5150 a branch instruction. */
5151 if (arm_is_long_call_p (decl))
5154 /* If we are interworking and the function is not declared static
5155 then we can't tail-call it unless we know that it exists in this
5156 compilation unit (since it might be a Thumb routine). */
5157 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5160 func_type = arm_current_func_type ();
5161 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5162 if (IS_INTERRUPT (func_type))
5165 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5167 /* Check that the return value locations are the same. For
5168 example that we aren't returning a value from the sibling in
5169 a VFP register but then need to transfer it to a core
5173 a = arm_function_value (TREE_TYPE (exp), decl, false);
5174 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5176 if (!rtx_equal_p (a, b))
5180 /* Never tailcall if function may be called with a misaligned SP. */
5181 if (IS_STACKALIGN (func_type))
5184 /* Everything else is ok. */
5189 /* Addressing mode support functions. */
5191 /* Return nonzero if X is a legitimate immediate operand when compiling
5192 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5194 legitimate_pic_operand_p (rtx x)
5196 if (GET_CODE (x) == SYMBOL_REF
5197 || (GET_CODE (x) == CONST
5198 && GET_CODE (XEXP (x, 0)) == PLUS
5199 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5205 /* Record that the current function needs a PIC register. Initialize
5206 cfun->machine->pic_reg if we have not already done so. */
5209 require_pic_register (void)
5211 /* A lot of the logic here is made obscure by the fact that this
5212 routine gets called as part of the rtx cost estimation process.
5213 We don't want those calls to affect any assumptions about the real
5214 function; and further, we can't call entry_of_function() until we
5215 start the real expansion process. */
5216 if (!crtl->uses_pic_offset_table)
5218 gcc_assert (can_create_pseudo_p ());
5219 if (arm_pic_register != INVALID_REGNUM)
5221 if (!cfun->machine->pic_reg)
5222 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5224 /* Play games to avoid marking the function as needing pic
5225 if we are being called as part of the cost-estimation
5227 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5228 crtl->uses_pic_offset_table = 1;
5234 if (!cfun->machine->pic_reg)
5235 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5237 /* Play games to avoid marking the function as needing pic
5238 if we are being called as part of the cost-estimation
5240 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5242 crtl->uses_pic_offset_table = 1;
5245 arm_load_pic_register (0UL);
5250 for (insn = seq; insn; insn = NEXT_INSN (insn))
5252 INSN_LOCATOR (insn) = prologue_locator;
5254 /* We can be called during expansion of PHI nodes, where
5255 we can't yet emit instructions directly in the final
5256 insn stream. Queue the insns on the entry edge, they will
5257 be committed after everything else is expanded. */
5258 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5265 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5267 if (GET_CODE (orig) == SYMBOL_REF
5268 || GET_CODE (orig) == LABEL_REF)
5274 gcc_assert (can_create_pseudo_p ());
5275 reg = gen_reg_rtx (Pmode);
5278 /* VxWorks does not impose a fixed gap between segments; the run-time
5279 gap can be different from the object-file gap. We therefore can't
5280 use GOTOFF unless we are absolutely sure that the symbol is in the
5281 same segment as the GOT. Unfortunately, the flexibility of linker
5282 scripts means that we can't be sure of that in general, so assume
5283 that GOTOFF is never valid on VxWorks. */
5284 if ((GET_CODE (orig) == LABEL_REF
5285 || (GET_CODE (orig) == SYMBOL_REF &&
5286 SYMBOL_REF_LOCAL_P (orig)))
5288 && !TARGET_VXWORKS_RTP)
5289 insn = arm_pic_static_addr (orig, reg);
5295 /* If this function doesn't have a pic register, create one now. */
5296 require_pic_register ();
5298 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5300 /* Make the MEM as close to a constant as possible. */
5301 mem = SET_SRC (pat);
5302 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5303 MEM_READONLY_P (mem) = 1;
5304 MEM_NOTRAP_P (mem) = 1;
5306 insn = emit_insn (pat);
5309 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5311 set_unique_reg_note (insn, REG_EQUAL, orig);
5315 else if (GET_CODE (orig) == CONST)
5319 if (GET_CODE (XEXP (orig, 0)) == PLUS
5320 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5323 /* Handle the case where we have: const (UNSPEC_TLS). */
5324 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5325 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5328 /* Handle the case where we have:
5329 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5331 if (GET_CODE (XEXP (orig, 0)) == PLUS
5332 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5333 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5335 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5341 gcc_assert (can_create_pseudo_p ());
5342 reg = gen_reg_rtx (Pmode);
5345 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5347 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5348 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5349 base == reg ? 0 : reg);
5351 if (GET_CODE (offset) == CONST_INT)
5353 /* The base register doesn't really matter, we only want to
5354 test the index for the appropriate mode. */
5355 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5357 gcc_assert (can_create_pseudo_p ());
5358 offset = force_reg (Pmode, offset);
5361 if (GET_CODE (offset) == CONST_INT)
5362 return plus_constant (base, INTVAL (offset));
5365 if (GET_MODE_SIZE (mode) > 4
5366 && (GET_MODE_CLASS (mode) == MODE_INT
5367 || TARGET_SOFT_FLOAT))
5369 emit_insn (gen_addsi3 (reg, base, offset));
5373 return gen_rtx_PLUS (Pmode, base, offset);
5380 /* Find a spare register to use during the prolog of a function. */
5383 thumb_find_work_register (unsigned long pushed_regs_mask)
5387 /* Check the argument registers first as these are call-used. The
5388 register allocation order means that sometimes r3 might be used
5389 but earlier argument registers might not, so check them all. */
5390 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5391 if (!df_regs_ever_live_p (reg))
5394 /* Before going on to check the call-saved registers we can try a couple
5395 more ways of deducing that r3 is available. The first is when we are
5396 pushing anonymous arguments onto the stack and we have less than 4
5397 registers worth of fixed arguments(*). In this case r3 will be part of
5398 the variable argument list and so we can be sure that it will be
5399 pushed right at the start of the function. Hence it will be available
5400 for the rest of the prologue.
5401 (*): ie crtl->args.pretend_args_size is greater than 0. */
5402 if (cfun->machine->uses_anonymous_args
5403 && crtl->args.pretend_args_size > 0)
5404 return LAST_ARG_REGNUM;
5406 /* The other case is when we have fixed arguments but less than 4 registers
5407 worth. In this case r3 might be used in the body of the function, but
5408 it is not being used to convey an argument into the function. In theory
5409 we could just check crtl->args.size to see how many bytes are
5410 being passed in argument registers, but it seems that it is unreliable.
5411 Sometimes it will have the value 0 when in fact arguments are being
5412 passed. (See testcase execute/20021111-1.c for an example). So we also
5413 check the args_info.nregs field as well. The problem with this field is
5414 that it makes no allowances for arguments that are passed to the
5415 function but which are not used. Hence we could miss an opportunity
5416 when a function has an unused argument in r3. But it is better to be
5417 safe than to be sorry. */
5418 if (! cfun->machine->uses_anonymous_args
5419 && crtl->args.size >= 0
5420 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5421 && crtl->args.info.nregs < 4)
5422 return LAST_ARG_REGNUM;
5424 /* Otherwise look for a call-saved register that is going to be pushed. */
5425 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5426 if (pushed_regs_mask & (1 << reg))
5431 /* Thumb-2 can use high regs. */
5432 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5433 if (pushed_regs_mask & (1 << reg))
5436 /* Something went wrong - thumb_compute_save_reg_mask()
5437 should have arranged for a suitable register to be pushed. */
5441 static GTY(()) int pic_labelno;
5443 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5447 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5449 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5451 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5454 gcc_assert (flag_pic);
5456 pic_reg = cfun->machine->pic_reg;
5457 if (TARGET_VXWORKS_RTP)
5459 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5460 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5461 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5463 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5465 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5466 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5470 /* We use an UNSPEC rather than a LABEL_REF because this label
5471 never appears in the code stream. */
5473 labelno = GEN_INT (pic_labelno++);
5474 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5475 l1 = gen_rtx_CONST (VOIDmode, l1);
5477 /* On the ARM the PC register contains 'dot + 8' at the time of the
5478 addition, on the Thumb it is 'dot + 4'. */
5479 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5480 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5482 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5486 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5488 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5490 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5492 else /* TARGET_THUMB1 */
5494 if (arm_pic_register != INVALID_REGNUM
5495 && REGNO (pic_reg) > LAST_LO_REGNUM)
5497 /* We will have pushed the pic register, so we should always be
5498 able to find a work register. */
5499 pic_tmp = gen_rtx_REG (SImode,
5500 thumb_find_work_register (saved_regs));
5501 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5502 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5505 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5506 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5510 /* Need to emit this whether or not we obey regdecls,
5511 since setjmp/longjmp can cause life info to screw up. */
5515 /* Generate code to load the address of a static var when flag_pic is set. */
5517 arm_pic_static_addr (rtx orig, rtx reg)
5519 rtx l1, labelno, offset_rtx, insn;
5521 gcc_assert (flag_pic);
5523 /* We use an UNSPEC rather than a LABEL_REF because this label
5524 never appears in the code stream. */
5525 labelno = GEN_INT (pic_labelno++);
5526 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5527 l1 = gen_rtx_CONST (VOIDmode, l1);
5529 /* On the ARM the PC register contains 'dot + 8' at the time of the
5530 addition, on the Thumb it is 'dot + 4'. */
5531 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5532 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5533 UNSPEC_SYMBOL_OFFSET);
5534 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5538 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5540 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5542 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5544 else /* TARGET_THUMB1 */
5546 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5547 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5553 /* Return nonzero if X is valid as an ARM state addressing register. */
5555 arm_address_register_rtx_p (rtx x, int strict_p)
5559 if (GET_CODE (x) != REG)
5565 return ARM_REGNO_OK_FOR_BASE_P (regno);
5567 return (regno <= LAST_ARM_REGNUM
5568 || regno >= FIRST_PSEUDO_REGISTER
5569 || regno == FRAME_POINTER_REGNUM
5570 || regno == ARG_POINTER_REGNUM);
5573 /* Return TRUE if this rtx is the difference of a symbol and a label,
5574 and will reduce to a PC-relative relocation in the object file.
5575 Expressions like this can be left alone when generating PIC, rather
5576 than forced through the GOT. */
5578 pcrel_constant_p (rtx x)
5580 if (GET_CODE (x) == MINUS)
5581 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5586 /* Return true if X will surely end up in an index register after next
5589 will_be_in_index_register (const_rtx x)
5591 /* arm.md: calculate_pic_address will split this into a register. */
5592 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5595 /* Return nonzero if X is a valid ARM state address operand. */
5597 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5601 enum rtx_code code = GET_CODE (x);
5603 if (arm_address_register_rtx_p (x, strict_p))
5606 use_ldrd = (TARGET_LDRD
5608 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5610 if (code == POST_INC || code == PRE_DEC
5611 || ((code == PRE_INC || code == POST_DEC)
5612 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5613 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5615 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5616 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5617 && GET_CODE (XEXP (x, 1)) == PLUS
5618 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5620 rtx addend = XEXP (XEXP (x, 1), 1);
5622 /* Don't allow ldrd post increment by register because it's hard
5623 to fixup invalid register choices. */
5625 && GET_CODE (x) == POST_MODIFY
5626 && GET_CODE (addend) == REG)
5629 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5630 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5633 /* After reload constants split into minipools will have addresses
5634 from a LABEL_REF. */
5635 else if (reload_completed
5636 && (code == LABEL_REF
5638 && GET_CODE (XEXP (x, 0)) == PLUS
5639 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5640 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5643 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5646 else if (code == PLUS)
5648 rtx xop0 = XEXP (x, 0);
5649 rtx xop1 = XEXP (x, 1);
5651 return ((arm_address_register_rtx_p (xop0, strict_p)
5652 && ((GET_CODE(xop1) == CONST_INT
5653 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5654 || (!strict_p && will_be_in_index_register (xop1))))
5655 || (arm_address_register_rtx_p (xop1, strict_p)
5656 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5660 /* Reload currently can't handle MINUS, so disable this for now */
5661 else if (GET_CODE (x) == MINUS)
5663 rtx xop0 = XEXP (x, 0);
5664 rtx xop1 = XEXP (x, 1);
5666 return (arm_address_register_rtx_p (xop0, strict_p)
5667 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5671 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5672 && code == SYMBOL_REF
5673 && CONSTANT_POOL_ADDRESS_P (x)
5675 && symbol_mentioned_p (get_pool_constant (x))
5676 && ! pcrel_constant_p (get_pool_constant (x))))
5682 /* Return nonzero if X is a valid Thumb-2 address operand. */
5684 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5687 enum rtx_code code = GET_CODE (x);
5689 if (arm_address_register_rtx_p (x, strict_p))
5692 use_ldrd = (TARGET_LDRD
5694 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5696 if (code == POST_INC || code == PRE_DEC
5697 || ((code == PRE_INC || code == POST_DEC)
5698 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5699 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5701 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5702 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5703 && GET_CODE (XEXP (x, 1)) == PLUS
5704 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5706 /* Thumb-2 only has autoincrement by constant. */
5707 rtx addend = XEXP (XEXP (x, 1), 1);
5708 HOST_WIDE_INT offset;
5710 if (GET_CODE (addend) != CONST_INT)
5713 offset = INTVAL(addend);
5714 if (GET_MODE_SIZE (mode) <= 4)
5715 return (offset > -256 && offset < 256);
5717 return (use_ldrd && offset > -1024 && offset < 1024
5718 && (offset & 3) == 0);
5721 /* After reload constants split into minipools will have addresses
5722 from a LABEL_REF. */
5723 else if (reload_completed
5724 && (code == LABEL_REF
5726 && GET_CODE (XEXP (x, 0)) == PLUS
5727 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5728 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5731 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5734 else if (code == PLUS)
5736 rtx xop0 = XEXP (x, 0);
5737 rtx xop1 = XEXP (x, 1);
5739 return ((arm_address_register_rtx_p (xop0, strict_p)
5740 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5741 || (!strict_p && will_be_in_index_register (xop1))))
5742 || (arm_address_register_rtx_p (xop1, strict_p)
5743 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5746 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5747 && code == SYMBOL_REF
5748 && CONSTANT_POOL_ADDRESS_P (x)
5750 && symbol_mentioned_p (get_pool_constant (x))
5751 && ! pcrel_constant_p (get_pool_constant (x))))
5757 /* Return nonzero if INDEX is valid for an address index operand in
5760 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5763 HOST_WIDE_INT range;
5764 enum rtx_code code = GET_CODE (index);
5766 /* Standard coprocessor addressing modes. */
5767 if (TARGET_HARD_FLOAT
5768 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5769 && (mode == SFmode || mode == DFmode
5770 || (TARGET_MAVERICK && mode == DImode)))
5771 return (code == CONST_INT && INTVAL (index) < 1024
5772 && INTVAL (index) > -1024
5773 && (INTVAL (index) & 3) == 0);
5775 /* For quad modes, we restrict the constant offset to be slightly less
5776 than what the instruction format permits. We do this because for
5777 quad mode moves, we will actually decompose them into two separate
5778 double-mode reads or writes. INDEX must therefore be a valid
5779 (double-mode) offset and so should INDEX+8. */
5780 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5781 return (code == CONST_INT
5782 && INTVAL (index) < 1016
5783 && INTVAL (index) > -1024
5784 && (INTVAL (index) & 3) == 0);
5786 /* We have no such constraint on double mode offsets, so we permit the
5787 full range of the instruction format. */
5788 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5789 return (code == CONST_INT
5790 && INTVAL (index) < 1024
5791 && INTVAL (index) > -1024
5792 && (INTVAL (index) & 3) == 0);
5794 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5795 return (code == CONST_INT
5796 && INTVAL (index) < 1024
5797 && INTVAL (index) > -1024
5798 && (INTVAL (index) & 3) == 0);
5800 if (arm_address_register_rtx_p (index, strict_p)
5801 && (GET_MODE_SIZE (mode) <= 4))
5804 if (mode == DImode || mode == DFmode)
5806 if (code == CONST_INT)
5808 HOST_WIDE_INT val = INTVAL (index);
5811 return val > -256 && val < 256;
5813 return val > -4096 && val < 4092;
5816 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5819 if (GET_MODE_SIZE (mode) <= 4
5823 || (mode == QImode && outer == SIGN_EXTEND))))
5827 rtx xiop0 = XEXP (index, 0);
5828 rtx xiop1 = XEXP (index, 1);
5830 return ((arm_address_register_rtx_p (xiop0, strict_p)
5831 && power_of_two_operand (xiop1, SImode))
5832 || (arm_address_register_rtx_p (xiop1, strict_p)
5833 && power_of_two_operand (xiop0, SImode)));
5835 else if (code == LSHIFTRT || code == ASHIFTRT
5836 || code == ASHIFT || code == ROTATERT)
5838 rtx op = XEXP (index, 1);
5840 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5841 && GET_CODE (op) == CONST_INT
5843 && INTVAL (op) <= 31);
5847 /* For ARM v4 we may be doing a sign-extend operation during the
5853 || (outer == SIGN_EXTEND && mode == QImode))
5859 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5861 return (code == CONST_INT
5862 && INTVAL (index) < range
5863 && INTVAL (index) > -range);
5866 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5867 index operand. i.e. 1, 2, 4 or 8. */
5869 thumb2_index_mul_operand (rtx op)
5873 if (GET_CODE(op) != CONST_INT)
5877 return (val == 1 || val == 2 || val == 4 || val == 8);
5880 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5882 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5884 enum rtx_code code = GET_CODE (index);
5886 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5887 /* Standard coprocessor addressing modes. */
5888 if (TARGET_HARD_FLOAT
5889 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5890 && (mode == SFmode || mode == DFmode
5891 || (TARGET_MAVERICK && mode == DImode)))
5892 return (code == CONST_INT && INTVAL (index) < 1024
5893 /* Thumb-2 allows only > -256 index range for it's core register
5894 load/stores. Since we allow SF/DF in core registers, we have
5895 to use the intersection between -256~4096 (core) and -1024~1024
5897 && INTVAL (index) > -256
5898 && (INTVAL (index) & 3) == 0);
5900 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5902 /* For DImode assume values will usually live in core regs
5903 and only allow LDRD addressing modes. */
5904 if (!TARGET_LDRD || mode != DImode)
5905 return (code == CONST_INT
5906 && INTVAL (index) < 1024
5907 && INTVAL (index) > -1024
5908 && (INTVAL (index) & 3) == 0);
5911 /* For quad modes, we restrict the constant offset to be slightly less
5912 than what the instruction format permits. We do this because for
5913 quad mode moves, we will actually decompose them into two separate
5914 double-mode reads or writes. INDEX must therefore be a valid
5915 (double-mode) offset and so should INDEX+8. */
5916 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5917 return (code == CONST_INT
5918 && INTVAL (index) < 1016
5919 && INTVAL (index) > -1024
5920 && (INTVAL (index) & 3) == 0);
5922 /* We have no such constraint on double mode offsets, so we permit the
5923 full range of the instruction format. */
5924 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5925 return (code == CONST_INT
5926 && INTVAL (index) < 1024
5927 && INTVAL (index) > -1024
5928 && (INTVAL (index) & 3) == 0);
5930 if (arm_address_register_rtx_p (index, strict_p)
5931 && (GET_MODE_SIZE (mode) <= 4))
5934 if (mode == DImode || mode == DFmode)
5936 if (code == CONST_INT)
5938 HOST_WIDE_INT val = INTVAL (index);
5939 /* ??? Can we assume ldrd for thumb2? */
5940 /* Thumb-2 ldrd only has reg+const addressing modes. */
5941 /* ldrd supports offsets of +-1020.
5942 However the ldr fallback does not. */
5943 return val > -256 && val < 256 && (val & 3) == 0;
5951 rtx xiop0 = XEXP (index, 0);
5952 rtx xiop1 = XEXP (index, 1);
5954 return ((arm_address_register_rtx_p (xiop0, strict_p)
5955 && thumb2_index_mul_operand (xiop1))
5956 || (arm_address_register_rtx_p (xiop1, strict_p)
5957 && thumb2_index_mul_operand (xiop0)));
5959 else if (code == ASHIFT)
5961 rtx op = XEXP (index, 1);
5963 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5964 && GET_CODE (op) == CONST_INT
5966 && INTVAL (op) <= 3);
5969 return (code == CONST_INT
5970 && INTVAL (index) < 4096
5971 && INTVAL (index) > -256);
5974 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5976 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5980 if (GET_CODE (x) != REG)
5986 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5988 return (regno <= LAST_LO_REGNUM
5989 || regno > LAST_VIRTUAL_REGISTER
5990 || regno == FRAME_POINTER_REGNUM
5991 || (GET_MODE_SIZE (mode) >= 4
5992 && (regno == STACK_POINTER_REGNUM
5993 || regno >= FIRST_PSEUDO_REGISTER
5994 || x == hard_frame_pointer_rtx
5995 || x == arg_pointer_rtx)));
5998 /* Return nonzero if x is a legitimate index register. This is the case
5999 for any base register that can access a QImode object. */
6001 thumb1_index_register_rtx_p (rtx x, int strict_p)
6003 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6006 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6008 The AP may be eliminated to either the SP or the FP, so we use the
6009 least common denominator, e.g. SImode, and offsets from 0 to 64.
6011 ??? Verify whether the above is the right approach.
6013 ??? Also, the FP may be eliminated to the SP, so perhaps that
6014 needs special handling also.
6016 ??? Look at how the mips16 port solves this problem. It probably uses
6017 better ways to solve some of these problems.
6019 Although it is not incorrect, we don't accept QImode and HImode
6020 addresses based on the frame pointer or arg pointer until the
6021 reload pass starts. This is so that eliminating such addresses
6022 into stack based ones won't produce impossible code. */
6024 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6026 /* ??? Not clear if this is right. Experiment. */
6027 if (GET_MODE_SIZE (mode) < 4
6028 && !(reload_in_progress || reload_completed)
6029 && (reg_mentioned_p (frame_pointer_rtx, x)
6030 || reg_mentioned_p (arg_pointer_rtx, x)
6031 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6032 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6033 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6034 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6037 /* Accept any base register. SP only in SImode or larger. */
6038 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6041 /* This is PC relative data before arm_reorg runs. */
6042 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6043 && GET_CODE (x) == SYMBOL_REF
6044 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6047 /* This is PC relative data after arm_reorg runs. */
6048 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6050 && (GET_CODE (x) == LABEL_REF
6051 || (GET_CODE (x) == CONST
6052 && GET_CODE (XEXP (x, 0)) == PLUS
6053 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6054 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
6057 /* Post-inc indexing only supported for SImode and larger. */
6058 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6059 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6062 else if (GET_CODE (x) == PLUS)
6064 /* REG+REG address can be any two index registers. */
6065 /* We disallow FRAME+REG addressing since we know that FRAME
6066 will be replaced with STACK, and SP relative addressing only
6067 permits SP+OFFSET. */
6068 if (GET_MODE_SIZE (mode) <= 4
6069 && XEXP (x, 0) != frame_pointer_rtx
6070 && XEXP (x, 1) != frame_pointer_rtx
6071 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6072 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6073 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6076 /* REG+const has 5-7 bit offset for non-SP registers. */
6077 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6078 || XEXP (x, 0) == arg_pointer_rtx)
6079 && GET_CODE (XEXP (x, 1)) == CONST_INT
6080 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6083 /* REG+const has 10-bit offset for SP, but only SImode and
6084 larger is supported. */
6085 /* ??? Should probably check for DI/DFmode overflow here
6086 just like GO_IF_LEGITIMATE_OFFSET does. */
6087 else if (GET_CODE (XEXP (x, 0)) == REG
6088 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6089 && GET_MODE_SIZE (mode) >= 4
6090 && GET_CODE (XEXP (x, 1)) == CONST_INT
6091 && INTVAL (XEXP (x, 1)) >= 0
6092 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6093 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6096 else if (GET_CODE (XEXP (x, 0)) == REG
6097 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6098 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6099 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6100 && REGNO (XEXP (x, 0))
6101 <= LAST_VIRTUAL_POINTER_REGISTER))
6102 && GET_MODE_SIZE (mode) >= 4
6103 && GET_CODE (XEXP (x, 1)) == CONST_INT
6104 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6108 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6109 && GET_MODE_SIZE (mode) == 4
6110 && GET_CODE (x) == SYMBOL_REF
6111 && CONSTANT_POOL_ADDRESS_P (x)
6113 && symbol_mentioned_p (get_pool_constant (x))
6114 && ! pcrel_constant_p (get_pool_constant (x))))
6120 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6121 instruction of mode MODE. */
6123 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6125 switch (GET_MODE_SIZE (mode))
6128 return val >= 0 && val < 32;
6131 return val >= 0 && val < 64 && (val & 1) == 0;
6135 && (val + GET_MODE_SIZE (mode)) <= 128
6141 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6144 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6145 else if (TARGET_THUMB2)
6146 return thumb2_legitimate_address_p (mode, x, strict_p);
6147 else /* if (TARGET_THUMB1) */
6148 return thumb1_legitimate_address_p (mode, x, strict_p);
6151 /* Build the SYMBOL_REF for __tls_get_addr. */
6153 static GTY(()) rtx tls_get_addr_libfunc;
6156 get_tls_get_addr (void)
6158 if (!tls_get_addr_libfunc)
6159 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6160 return tls_get_addr_libfunc;
6164 arm_load_tp (rtx target)
6167 target = gen_reg_rtx (SImode);
6171 /* Can return in any reg. */
6172 emit_insn (gen_load_tp_hard (target));
6176 /* Always returned in r0. Immediately copy the result into a pseudo,
6177 otherwise other uses of r0 (e.g. setting up function arguments) may
6178 clobber the value. */
6182 emit_insn (gen_load_tp_soft ());
6184 tmp = gen_rtx_REG (SImode, 0);
6185 emit_move_insn (target, tmp);
6191 load_tls_operand (rtx x, rtx reg)
6195 if (reg == NULL_RTX)
6196 reg = gen_reg_rtx (SImode);
6198 tmp = gen_rtx_CONST (SImode, x);
6200 emit_move_insn (reg, tmp);
6206 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6208 rtx insns, label, labelno, sum;
6210 gcc_assert (reloc != TLS_DESCSEQ);
6213 labelno = GEN_INT (pic_labelno++);
6214 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6215 label = gen_rtx_CONST (VOIDmode, label);
6217 sum = gen_rtx_UNSPEC (Pmode,
6218 gen_rtvec (4, x, GEN_INT (reloc), label,
6219 GEN_INT (TARGET_ARM ? 8 : 4)),
6221 reg = load_tls_operand (sum, reg);
6224 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6226 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6228 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6229 LCT_PURE, /* LCT_CONST? */
6230 Pmode, 1, reg, Pmode);
6232 insns = get_insns ();
6239 arm_tls_descseq_addr (rtx x, rtx reg)
6241 rtx labelno = GEN_INT (pic_labelno++);
6242 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6243 rtx sum = gen_rtx_UNSPEC (Pmode,
6244 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6245 gen_rtx_CONST (VOIDmode, label),
6246 GEN_INT (!TARGET_ARM)),
6248 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6250 emit_insn (gen_tlscall (x, labelno));
6252 reg = gen_reg_rtx (SImode);
6254 gcc_assert (REGNO (reg) != 0);
6256 emit_move_insn (reg, reg0);
6262 legitimize_tls_address (rtx x, rtx reg)
6264 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6265 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6269 case TLS_MODEL_GLOBAL_DYNAMIC:
6270 if (TARGET_GNU2_TLS)
6272 reg = arm_tls_descseq_addr (x, reg);
6274 tp = arm_load_tp (NULL_RTX);
6276 dest = gen_rtx_PLUS (Pmode, tp, reg);
6280 /* Original scheme */
6281 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6282 dest = gen_reg_rtx (Pmode);
6283 emit_libcall_block (insns, dest, ret, x);
6287 case TLS_MODEL_LOCAL_DYNAMIC:
6288 if (TARGET_GNU2_TLS)
6290 reg = arm_tls_descseq_addr (x, reg);
6292 tp = arm_load_tp (NULL_RTX);
6294 dest = gen_rtx_PLUS (Pmode, tp, reg);
6298 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6300 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6301 share the LDM result with other LD model accesses. */
6302 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6304 dest = gen_reg_rtx (Pmode);
6305 emit_libcall_block (insns, dest, ret, eqv);
6307 /* Load the addend. */
6308 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6309 GEN_INT (TLS_LDO32)),
6311 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6312 dest = gen_rtx_PLUS (Pmode, dest, addend);
6316 case TLS_MODEL_INITIAL_EXEC:
6317 labelno = GEN_INT (pic_labelno++);
6318 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6319 label = gen_rtx_CONST (VOIDmode, label);
6320 sum = gen_rtx_UNSPEC (Pmode,
6321 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6322 GEN_INT (TARGET_ARM ? 8 : 4)),
6324 reg = load_tls_operand (sum, reg);
6327 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6328 else if (TARGET_THUMB2)
6329 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6332 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6333 emit_move_insn (reg, gen_const_mem (SImode, reg));
6336 tp = arm_load_tp (NULL_RTX);
6338 return gen_rtx_PLUS (Pmode, tp, reg);
6340 case TLS_MODEL_LOCAL_EXEC:
6341 tp = arm_load_tp (NULL_RTX);
6343 reg = gen_rtx_UNSPEC (Pmode,
6344 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6346 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6348 return gen_rtx_PLUS (Pmode, tp, reg);
6355 /* Try machine-dependent ways of modifying an illegitimate address
6356 to be legitimate. If we find one, return the new, valid address. */
6358 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6362 /* TODO: legitimize_address for Thumb2. */
6365 return thumb_legitimize_address (x, orig_x, mode);
6368 if (arm_tls_symbol_p (x))
6369 return legitimize_tls_address (x, NULL_RTX);
6371 if (GET_CODE (x) == PLUS)
6373 rtx xop0 = XEXP (x, 0);
6374 rtx xop1 = XEXP (x, 1);
6376 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6377 xop0 = force_reg (SImode, xop0);
6379 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6380 xop1 = force_reg (SImode, xop1);
6382 if (ARM_BASE_REGISTER_RTX_P (xop0)
6383 && GET_CODE (xop1) == CONST_INT)
6385 HOST_WIDE_INT n, low_n;
6389 /* VFP addressing modes actually allow greater offsets, but for
6390 now we just stick with the lowest common denominator. */
6392 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6404 low_n = ((mode) == TImode ? 0
6405 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6409 base_reg = gen_reg_rtx (SImode);
6410 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6411 emit_move_insn (base_reg, val);
6412 x = plus_constant (base_reg, low_n);
6414 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6415 x = gen_rtx_PLUS (SImode, xop0, xop1);
6418 /* XXX We don't allow MINUS any more -- see comment in
6419 arm_legitimate_address_outer_p (). */
6420 else if (GET_CODE (x) == MINUS)
6422 rtx xop0 = XEXP (x, 0);
6423 rtx xop1 = XEXP (x, 1);
6425 if (CONSTANT_P (xop0))
6426 xop0 = force_reg (SImode, xop0);
6428 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6429 xop1 = force_reg (SImode, xop1);
6431 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6432 x = gen_rtx_MINUS (SImode, xop0, xop1);
6435 /* Make sure to take full advantage of the pre-indexed addressing mode
6436 with absolute addresses which often allows for the base register to
6437 be factorized for multiple adjacent memory references, and it might
6438 even allows for the mini pool to be avoided entirely. */
6439 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6442 HOST_WIDE_INT mask, base, index;
6445 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6446 use a 8-bit index. So let's use a 12-bit index for SImode only and
6447 hope that arm_gen_constant will enable ldrb to use more bits. */
6448 bits = (mode == SImode) ? 12 : 8;
6449 mask = (1 << bits) - 1;
6450 base = INTVAL (x) & ~mask;
6451 index = INTVAL (x) & mask;
6452 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6454 /* It'll most probably be more efficient to generate the base
6455 with more bits set and use a negative index instead. */
6459 base_reg = force_reg (SImode, GEN_INT (base));
6460 x = plus_constant (base_reg, index);
6465 /* We need to find and carefully transform any SYMBOL and LABEL
6466 references; so go back to the original address expression. */
6467 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6469 if (new_x != orig_x)
6477 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6478 to be legitimate. If we find one, return the new, valid address. */
6480 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6482 if (arm_tls_symbol_p (x))
6483 return legitimize_tls_address (x, NULL_RTX);
6485 if (GET_CODE (x) == PLUS
6486 && GET_CODE (XEXP (x, 1)) == CONST_INT
6487 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6488 || INTVAL (XEXP (x, 1)) < 0))
6490 rtx xop0 = XEXP (x, 0);
6491 rtx xop1 = XEXP (x, 1);
6492 HOST_WIDE_INT offset = INTVAL (xop1);
6494 /* Try and fold the offset into a biasing of the base register and
6495 then offsetting that. Don't do this when optimizing for space
6496 since it can cause too many CSEs. */
6497 if (optimize_size && offset >= 0
6498 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6500 HOST_WIDE_INT delta;
6503 delta = offset - (256 - GET_MODE_SIZE (mode));
6504 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6505 delta = 31 * GET_MODE_SIZE (mode);
6507 delta = offset & (~31 * GET_MODE_SIZE (mode));
6509 xop0 = force_operand (plus_constant (xop0, offset - delta),
6511 x = plus_constant (xop0, delta);
6513 else if (offset < 0 && offset > -256)
6514 /* Small negative offsets are best done with a subtract before the
6515 dereference, forcing these into a register normally takes two
6517 x = force_operand (x, NULL_RTX);
6520 /* For the remaining cases, force the constant into a register. */
6521 xop1 = force_reg (SImode, xop1);
6522 x = gen_rtx_PLUS (SImode, xop0, xop1);
6525 else if (GET_CODE (x) == PLUS
6526 && s_register_operand (XEXP (x, 1), SImode)
6527 && !s_register_operand (XEXP (x, 0), SImode))
6529 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6531 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6536 /* We need to find and carefully transform any SYMBOL and LABEL
6537 references; so go back to the original address expression. */
6538 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6540 if (new_x != orig_x)
6548 arm_legitimize_reload_address (rtx *p,
6549 enum machine_mode mode,
6550 int opnum, int type,
6551 int ind_levels ATTRIBUTE_UNUSED)
6553 if (GET_CODE (*p) == PLUS
6554 && GET_CODE (XEXP (*p, 0)) == REG
6555 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6556 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6558 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6559 HOST_WIDE_INT low, high;
6561 /* Detect coprocessor load/stores. */
6562 bool coproc_p = ((TARGET_HARD_FLOAT
6563 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
6564 && (mode == SFmode || mode == DFmode
6565 || (mode == DImode && TARGET_MAVERICK)))
6566 || (TARGET_REALLY_IWMMXT
6567 && VALID_IWMMXT_REG_MODE (mode))
6569 && (VALID_NEON_DREG_MODE (mode)
6570 || VALID_NEON_QREG_MODE (mode))));
6572 /* For some conditions, bail out when lower two bits are unaligned. */
6573 if ((val & 0x3) != 0
6574 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6576 /* For DI, and DF under soft-float: */
6577 || ((mode == DImode || mode == DFmode)
6578 /* Without ldrd, we use stm/ldm, which does not
6579 fair well with unaligned bits. */
6581 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6582 || TARGET_THUMB2))))
6585 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6586 of which the (reg+high) gets turned into a reload add insn,
6587 we try to decompose the index into high/low values that can often
6588 also lead to better reload CSE.
6590 ldr r0, [r2, #4100] // Offset too large
6591 ldr r1, [r2, #4104] // Offset too large
6593 is best reloaded as:
6599 which post-reload CSE can simplify in most cases to eliminate the
6600 second add instruction:
6605 The idea here is that we want to split out the bits of the constant
6606 as a mask, rather than as subtracting the maximum offset that the
6607 respective type of load/store used can handle.
6609 When encountering negative offsets, we can still utilize it even if
6610 the overall offset is positive; sometimes this may lead to an immediate
6611 that can be constructed with fewer instructions.
6613 ldr r0, [r2, #0x3FFFFC]
6615 This is best reloaded as:
6616 add t1, r2, #0x400000
6619 The trick for spotting this for a load insn with N bits of offset
6620 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6621 negative offset that is going to make bit N and all the bits below
6622 it become zero in the remainder part.
6624 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6625 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6626 used in most cases of ARM load/store instructions. */
6628 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6629 (((VAL) & ((1 << (N)) - 1)) \
6630 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6635 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6637 /* NEON quad-word load/stores are made of two double-word accesses,
6638 so the valid index range is reduced by 8. Treat as 9-bit range if
6640 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6641 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6643 else if (GET_MODE_SIZE (mode) == 8)
6646 low = (TARGET_THUMB2
6647 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6648 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6650 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6651 to access doublewords. The supported load/store offsets are
6652 -8, -4, and 4, which we try to produce here. */
6653 low = ((val & 0xf) ^ 0x8) - 0x8;
6655 else if (GET_MODE_SIZE (mode) < 8)
6657 /* NEON element load/stores do not have an offset. */
6658 if (TARGET_NEON_FP16 && mode == HFmode)
6663 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6664 Try the wider 12-bit range first, and re-try if the result
6666 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6668 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6672 if (mode == HImode || mode == HFmode)
6675 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6678 /* The storehi/movhi_bytes fallbacks can use only
6679 [-4094,+4094] of the full ldrb/strb index range. */
6680 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6681 if (low == 4095 || low == -4095)
6686 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6692 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6693 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6694 - (unsigned HOST_WIDE_INT) 0x80000000);
6695 /* Check for overflow or zero */
6696 if (low == 0 || high == 0 || (high + low != val))
6699 /* Reload the high part into a base reg; leave the low part
6701 *p = gen_rtx_PLUS (GET_MODE (*p),
6702 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6705 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6706 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6707 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6715 thumb_legitimize_reload_address (rtx *x_p,
6716 enum machine_mode mode,
6717 int opnum, int type,
6718 int ind_levels ATTRIBUTE_UNUSED)
6722 if (GET_CODE (x) == PLUS
6723 && GET_MODE_SIZE (mode) < 4
6724 && REG_P (XEXP (x, 0))
6725 && XEXP (x, 0) == stack_pointer_rtx
6726 && GET_CODE (XEXP (x, 1)) == CONST_INT
6727 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6732 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6733 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6737 /* If both registers are hi-regs, then it's better to reload the
6738 entire expression rather than each register individually. That
6739 only requires one reload register rather than two. */
6740 if (GET_CODE (x) == PLUS
6741 && REG_P (XEXP (x, 0))
6742 && REG_P (XEXP (x, 1))
6743 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6744 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6749 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6750 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6757 /* Test for various thread-local symbols. */
6759 /* Return TRUE if X is a thread-local symbol. */
6762 arm_tls_symbol_p (rtx x)
6764 if (! TARGET_HAVE_TLS)
6767 if (GET_CODE (x) != SYMBOL_REF)
6770 return SYMBOL_REF_TLS_MODEL (x) != 0;
6773 /* Helper for arm_tls_referenced_p. */
6776 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6778 if (GET_CODE (*x) == SYMBOL_REF)
6779 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6781 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6782 TLS offsets, not real symbol references. */
6783 if (GET_CODE (*x) == UNSPEC
6784 && XINT (*x, 1) == UNSPEC_TLS)
6790 /* Return TRUE if X contains any TLS symbol references. */
6793 arm_tls_referenced_p (rtx x)
6795 if (! TARGET_HAVE_TLS)
6798 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6801 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6803 On the ARM, allow any integer (invalid ones are removed later by insn
6804 patterns), nice doubles and symbol_refs which refer to the function's
6807 When generating pic allow anything. */
6810 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6812 /* At present, we have no support for Neon structure constants, so forbid
6813 them here. It might be possible to handle simple cases like 0 and -1
6815 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6818 return flag_pic || !label_mentioned_p (x);
6822 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6824 return (GET_CODE (x) == CONST_INT
6825 || GET_CODE (x) == CONST_DOUBLE
6826 || CONSTANT_ADDRESS_P (x)
6831 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6833 return (!arm_cannot_force_const_mem (mode, x)
6835 ? arm_legitimate_constant_p_1 (mode, x)
6836 : thumb_legitimate_constant_p (mode, x)));
6839 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6842 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6846 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6848 split_const (x, &base, &offset);
6849 if (GET_CODE (base) == SYMBOL_REF
6850 && !offset_within_block_p (base, INTVAL (offset)))
6853 return arm_tls_referenced_p (x);
6856 #define REG_OR_SUBREG_REG(X) \
6857 (GET_CODE (X) == REG \
6858 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6860 #define REG_OR_SUBREG_RTX(X) \
6861 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6864 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6866 enum machine_mode mode = GET_MODE (x);
6880 return COSTS_N_INSNS (1);
6883 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6886 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6893 return COSTS_N_INSNS (2) + cycles;
6895 return COSTS_N_INSNS (1) + 16;
6898 return (COSTS_N_INSNS (1)
6899 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6900 + GET_CODE (SET_DEST (x)) == MEM));
6905 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6907 if (thumb_shiftable_const (INTVAL (x)))
6908 return COSTS_N_INSNS (2);
6909 return COSTS_N_INSNS (3);
6911 else if ((outer == PLUS || outer == COMPARE)
6912 && INTVAL (x) < 256 && INTVAL (x) > -256)
6914 else if ((outer == IOR || outer == XOR || outer == AND)
6915 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6916 return COSTS_N_INSNS (1);
6917 else if (outer == AND)
6920 /* This duplicates the tests in the andsi3 expander. */
6921 for (i = 9; i <= 31; i++)
6922 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6923 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6924 return COSTS_N_INSNS (2);
6926 else if (outer == ASHIFT || outer == ASHIFTRT
6927 || outer == LSHIFTRT)
6929 return COSTS_N_INSNS (2);
6935 return COSTS_N_INSNS (3);
6953 /* XXX another guess. */
6954 /* Memory costs quite a lot for the first word, but subsequent words
6955 load at the equivalent of a single insn each. */
6956 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6957 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6962 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6968 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6969 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6975 return total + COSTS_N_INSNS (1);
6977 /* Assume a two-shift sequence. Increase the cost slightly so
6978 we prefer actual shifts over an extend operation. */
6979 return total + 1 + COSTS_N_INSNS (2);
6987 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6989 enum machine_mode mode = GET_MODE (x);
6990 enum rtx_code subcode;
6992 enum rtx_code code = GET_CODE (x);
6998 /* Memory costs quite a lot for the first word, but subsequent words
6999 load at the equivalent of a single insn each. */
7000 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7007 if (TARGET_HARD_FLOAT && mode == SFmode)
7008 *total = COSTS_N_INSNS (2);
7009 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7010 *total = COSTS_N_INSNS (4);
7012 *total = COSTS_N_INSNS (20);
7016 if (GET_CODE (XEXP (x, 1)) == REG)
7017 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7018 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7019 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7025 *total += COSTS_N_INSNS (4);
7030 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7031 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7034 *total += COSTS_N_INSNS (3);
7038 *total += COSTS_N_INSNS (1);
7039 /* Increase the cost of complex shifts because they aren't any faster,
7040 and reduce dual issue opportunities. */
7041 if (arm_tune_cortex_a9
7042 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
7050 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7051 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7052 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7054 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7058 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7059 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7061 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7068 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7070 if (TARGET_HARD_FLOAT
7072 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7074 *total = COSTS_N_INSNS (1);
7075 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
7076 && arm_const_double_rtx (XEXP (x, 0)))
7078 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7082 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7083 && arm_const_double_rtx (XEXP (x, 1)))
7085 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7091 *total = COSTS_N_INSNS (20);
7095 *total = COSTS_N_INSNS (1);
7096 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7097 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7099 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7103 subcode = GET_CODE (XEXP (x, 1));
7104 if (subcode == ASHIFT || subcode == ASHIFTRT
7105 || subcode == LSHIFTRT
7106 || subcode == ROTATE || subcode == ROTATERT)
7108 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7109 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7113 /* A shift as a part of RSB costs no more than RSB itself. */
7114 if (GET_CODE (XEXP (x, 0)) == MULT
7115 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7117 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7118 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7123 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7125 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7126 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7130 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7131 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7133 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7134 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
7135 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7136 *total += COSTS_N_INSNS (1);
7144 if (code == PLUS && arm_arch6 && mode == SImode
7145 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7146 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7148 *total = COSTS_N_INSNS (1);
7149 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7151 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7155 /* MLA: All arguments must be registers. We filter out
7156 multiplication by a power of two, so that we fall down into
7158 if (GET_CODE (XEXP (x, 0)) == MULT
7159 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7161 /* The cost comes from the cost of the multiply. */
7165 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7167 if (TARGET_HARD_FLOAT
7169 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7171 *total = COSTS_N_INSNS (1);
7172 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7173 && arm_const_double_rtx (XEXP (x, 1)))
7175 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7182 *total = COSTS_N_INSNS (20);
7186 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7187 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7189 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7190 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7191 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7192 *total += COSTS_N_INSNS (1);
7198 case AND: case XOR: case IOR:
7200 /* Normally the frame registers will be spilt into reg+const during
7201 reload, so it is a bad idea to combine them with other instructions,
7202 since then they might not be moved outside of loops. As a compromise
7203 we allow integration with ops that have a constant as their second
7205 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7206 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7207 && GET_CODE (XEXP (x, 1)) != CONST_INT)
7208 *total = COSTS_N_INSNS (1);
7212 *total += COSTS_N_INSNS (2);
7213 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7214 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7216 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7223 *total += COSTS_N_INSNS (1);
7224 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7225 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7227 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7230 subcode = GET_CODE (XEXP (x, 0));
7231 if (subcode == ASHIFT || subcode == ASHIFTRT
7232 || subcode == LSHIFTRT
7233 || subcode == ROTATE || subcode == ROTATERT)
7235 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7236 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7241 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7243 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7244 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7248 if (subcode == UMIN || subcode == UMAX
7249 || subcode == SMIN || subcode == SMAX)
7251 *total = COSTS_N_INSNS (3);
7258 /* This should have been handled by the CPU specific routines. */
7262 if (arm_arch3m && mode == SImode
7263 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7264 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7265 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7266 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7267 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7268 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7270 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7273 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7277 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7279 if (TARGET_HARD_FLOAT
7281 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7283 *total = COSTS_N_INSNS (1);
7286 *total = COSTS_N_INSNS (2);
7292 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7293 if (mode == SImode && code == NOT)
7295 subcode = GET_CODE (XEXP (x, 0));
7296 if (subcode == ASHIFT || subcode == ASHIFTRT
7297 || subcode == LSHIFTRT
7298 || subcode == ROTATE || subcode == ROTATERT
7300 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7302 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7303 /* Register shifts cost an extra cycle. */
7304 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7305 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7314 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7316 *total = COSTS_N_INSNS (4);
7320 operand = XEXP (x, 0);
7322 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7323 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7324 && GET_CODE (XEXP (operand, 0)) == REG
7325 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7326 *total += COSTS_N_INSNS (1);
7327 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7328 + rtx_cost (XEXP (x, 2), code, 2, speed));
7332 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7334 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7340 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7341 && mode == SImode && XEXP (x, 1) == const0_rtx)
7343 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7349 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7350 && mode == SImode && XEXP (x, 1) == const0_rtx)
7352 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7372 /* SCC insns. In the case where the comparison has already been
7373 performed, then they cost 2 instructions. Otherwise they need
7374 an additional comparison before them. */
7375 *total = COSTS_N_INSNS (2);
7376 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7383 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7389 *total += COSTS_N_INSNS (1);
7390 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7391 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7393 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7397 subcode = GET_CODE (XEXP (x, 0));
7398 if (subcode == ASHIFT || subcode == ASHIFTRT
7399 || subcode == LSHIFTRT
7400 || subcode == ROTATE || subcode == ROTATERT)
7402 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7403 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7408 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7410 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7411 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7421 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7422 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7423 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7424 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7428 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7430 if (TARGET_HARD_FLOAT
7432 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7434 *total = COSTS_N_INSNS (1);
7437 *total = COSTS_N_INSNS (20);
7440 *total = COSTS_N_INSNS (1);
7442 *total += COSTS_N_INSNS (3);
7448 if (GET_MODE_CLASS (mode) == MODE_INT)
7450 rtx op = XEXP (x, 0);
7451 enum machine_mode opmode = GET_MODE (op);
7454 *total += COSTS_N_INSNS (1);
7456 if (opmode != SImode)
7460 /* If !arm_arch4, we use one of the extendhisi2_mem
7461 or movhi_bytes patterns for HImode. For a QImode
7462 sign extension, we first zero-extend from memory
7463 and then perform a shift sequence. */
7464 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7465 *total += COSTS_N_INSNS (2);
7468 *total += COSTS_N_INSNS (1);
7470 /* We don't have the necessary insn, so we need to perform some
7472 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7473 /* An and with constant 255. */
7474 *total += COSTS_N_INSNS (1);
7476 /* A shift sequence. Increase costs slightly to avoid
7477 combining two shifts into an extend operation. */
7478 *total += COSTS_N_INSNS (2) + 1;
7484 switch (GET_MODE (XEXP (x, 0)))
7491 *total = COSTS_N_INSNS (1);
7501 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7505 if (const_ok_for_arm (INTVAL (x))
7506 || const_ok_for_arm (~INTVAL (x)))
7507 *total = COSTS_N_INSNS (1);
7509 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7510 INTVAL (x), NULL_RTX,
7517 *total = COSTS_N_INSNS (3);
7521 *total = COSTS_N_INSNS (1);
7525 *total = COSTS_N_INSNS (1);
7526 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7530 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7531 && (mode == SFmode || !TARGET_VFP_SINGLE))
7532 *total = COSTS_N_INSNS (1);
7534 *total = COSTS_N_INSNS (4);
7541 *total = COSTS_N_INSNS (4);
7546 /* Estimates the size cost of thumb1 instructions.
7547 For now most of the code is copied from thumb1_rtx_costs. We need more
7548 fine grain tuning when we have more related test cases. */
7550 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7552 enum machine_mode mode = GET_MODE (x);
7565 return COSTS_N_INSNS (1);
7568 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7570 /* Thumb1 mul instruction can't operate on const. We must Load it
7571 into a register first. */
7572 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7573 return COSTS_N_INSNS (1) + const_size;
7575 return COSTS_N_INSNS (1);
7578 return (COSTS_N_INSNS (1)
7579 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7580 + GET_CODE (SET_DEST (x)) == MEM));
7585 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7586 return COSTS_N_INSNS (1);
7587 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7588 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7589 return COSTS_N_INSNS (2);
7590 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7591 if (thumb_shiftable_const (INTVAL (x)))
7592 return COSTS_N_INSNS (2);
7593 return COSTS_N_INSNS (3);
7595 else if ((outer == PLUS || outer == COMPARE)
7596 && INTVAL (x) < 256 && INTVAL (x) > -256)
7598 else if ((outer == IOR || outer == XOR || outer == AND)
7599 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7600 return COSTS_N_INSNS (1);
7601 else if (outer == AND)
7604 /* This duplicates the tests in the andsi3 expander. */
7605 for (i = 9; i <= 31; i++)
7606 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7607 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7608 return COSTS_N_INSNS (2);
7610 else if (outer == ASHIFT || outer == ASHIFTRT
7611 || outer == LSHIFTRT)
7613 return COSTS_N_INSNS (2);
7619 return COSTS_N_INSNS (3);
7637 /* XXX another guess. */
7638 /* Memory costs quite a lot for the first word, but subsequent words
7639 load at the equivalent of a single insn each. */
7640 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7641 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7646 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7651 /* XXX still guessing. */
7652 switch (GET_MODE (XEXP (x, 0)))
7655 return (1 + (mode == DImode ? 4 : 0)
7656 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7659 return (4 + (mode == DImode ? 4 : 0)
7660 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7663 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7674 /* RTX costs when optimizing for size. */
7676 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7679 enum machine_mode mode = GET_MODE (x);
7682 *total = thumb1_size_rtx_costs (x, code, outer_code);
7686 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7690 /* A memory access costs 1 insn if the mode is small, or the address is
7691 a single register, otherwise it costs one insn per word. */
7692 if (REG_P (XEXP (x, 0)))
7693 *total = COSTS_N_INSNS (1);
7695 && GET_CODE (XEXP (x, 0)) == PLUS
7696 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7697 /* This will be split into two instructions.
7698 See arm.md:calculate_pic_address. */
7699 *total = COSTS_N_INSNS (2);
7701 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7708 /* Needs a libcall, so it costs about this. */
7709 *total = COSTS_N_INSNS (2);
7713 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7715 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7723 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7725 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
7728 else if (mode == SImode)
7730 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
7731 /* Slightly disparage register shifts, but not by much. */
7732 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7733 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
7737 /* Needs a libcall. */
7738 *total = COSTS_N_INSNS (2);
7742 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7743 && (mode == SFmode || !TARGET_VFP_SINGLE))
7745 *total = COSTS_N_INSNS (1);
7751 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7752 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7754 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7755 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7756 || subcode1 == ROTATE || subcode1 == ROTATERT
7757 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7758 || subcode1 == ASHIFTRT)
7760 /* It's just the cost of the two operands. */
7765 *total = COSTS_N_INSNS (1);
7769 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7773 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7774 && (mode == SFmode || !TARGET_VFP_SINGLE))
7776 *total = COSTS_N_INSNS (1);
7780 /* A shift as a part of ADD costs nothing. */
7781 if (GET_CODE (XEXP (x, 0)) == MULT
7782 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7784 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7785 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
7786 *total += rtx_cost (XEXP (x, 1), code, 1, false);
7791 case AND: case XOR: case IOR:
7794 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7796 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7797 || subcode == LSHIFTRT || subcode == ASHIFTRT
7798 || (code == AND && subcode == NOT))
7800 /* It's just the cost of the two operands. */
7806 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7810 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7814 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7815 && (mode == SFmode || !TARGET_VFP_SINGLE))
7817 *total = COSTS_N_INSNS (1);
7823 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7832 if (cc_register (XEXP (x, 0), VOIDmode))
7835 *total = COSTS_N_INSNS (1);
7839 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7840 && (mode == SFmode || !TARGET_VFP_SINGLE))
7841 *total = COSTS_N_INSNS (1);
7843 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7848 return arm_rtx_costs_1 (x, outer_code, total, 0);
7851 if (const_ok_for_arm (INTVAL (x)))
7852 /* A multiplication by a constant requires another instruction
7853 to load the constant to a register. */
7854 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7856 else if (const_ok_for_arm (~INTVAL (x)))
7857 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7858 else if (const_ok_for_arm (-INTVAL (x)))
7860 if (outer_code == COMPARE || outer_code == PLUS
7861 || outer_code == MINUS)
7864 *total = COSTS_N_INSNS (1);
7867 *total = COSTS_N_INSNS (2);
7873 *total = COSTS_N_INSNS (2);
7877 *total = COSTS_N_INSNS (4);
7882 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7883 cost of these slightly. */
7884 *total = COSTS_N_INSNS (1) + 1;
7891 if (mode != VOIDmode)
7892 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7894 *total = COSTS_N_INSNS (4); /* How knows? */
7899 /* RTX costs when optimizing for size. */
7901 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
7902 int *total, bool speed)
7905 return arm_size_rtx_costs (x, (enum rtx_code) code,
7906 (enum rtx_code) outer_code, total);
7908 return current_tune->rtx_costs (x, (enum rtx_code) code,
7909 (enum rtx_code) outer_code,
7913 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7914 supported on any "slowmul" cores, so it can be ignored. */
7917 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7918 int *total, bool speed)
7920 enum machine_mode mode = GET_MODE (x);
7924 *total = thumb1_rtx_costs (x, code, outer_code);
7931 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7934 *total = COSTS_N_INSNS (20);
7938 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7940 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7941 & (unsigned HOST_WIDE_INT) 0xffffffff);
7942 int cost, const_ok = const_ok_for_arm (i);
7943 int j, booth_unit_size;
7945 /* Tune as appropriate. */
7946 cost = const_ok ? 4 : 8;
7947 booth_unit_size = 2;
7948 for (j = 0; i && j < 32; j += booth_unit_size)
7950 i >>= booth_unit_size;
7954 *total = COSTS_N_INSNS (cost);
7955 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7959 *total = COSTS_N_INSNS (20);
7963 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7968 /* RTX cost for cores with a fast multiply unit (M variants). */
7971 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7972 int *total, bool speed)
7974 enum machine_mode mode = GET_MODE (x);
7978 *total = thumb1_rtx_costs (x, code, outer_code);
7982 /* ??? should thumb2 use different costs? */
7986 /* There is no point basing this on the tuning, since it is always the
7987 fast variant if it exists at all. */
7989 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7990 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7991 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7993 *total = COSTS_N_INSNS(2);
8000 *total = COSTS_N_INSNS (5);
8004 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8006 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8007 & (unsigned HOST_WIDE_INT) 0xffffffff);
8008 int cost, const_ok = const_ok_for_arm (i);
8009 int j, booth_unit_size;
8011 /* Tune as appropriate. */
8012 cost = const_ok ? 4 : 8;
8013 booth_unit_size = 8;
8014 for (j = 0; i && j < 32; j += booth_unit_size)
8016 i >>= booth_unit_size;
8020 *total = COSTS_N_INSNS(cost);
8026 *total = COSTS_N_INSNS (4);
8030 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8032 if (TARGET_HARD_FLOAT
8034 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8036 *total = COSTS_N_INSNS (1);
8041 /* Requires a lib call */
8042 *total = COSTS_N_INSNS (20);
8046 return arm_rtx_costs_1 (x, outer_code, total, speed);
8051 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8052 so it can be ignored. */
8055 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8056 int *total, bool speed)
8058 enum machine_mode mode = GET_MODE (x);
8062 *total = thumb1_rtx_costs (x, code, outer_code);
8069 if (GET_CODE (XEXP (x, 0)) != MULT)
8070 return arm_rtx_costs_1 (x, outer_code, total, speed);
8072 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8073 will stall until the multiplication is complete. */
8074 *total = COSTS_N_INSNS (3);
8078 /* There is no point basing this on the tuning, since it is always the
8079 fast variant if it exists at all. */
8081 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8082 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8083 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8085 *total = COSTS_N_INSNS (2);
8092 *total = COSTS_N_INSNS (5);
8096 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8098 /* If operand 1 is a constant we can more accurately
8099 calculate the cost of the multiply. The multiplier can
8100 retire 15 bits on the first cycle and a further 12 on the
8101 second. We do, of course, have to load the constant into
8102 a register first. */
8103 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8104 /* There's a general overhead of one cycle. */
8106 unsigned HOST_WIDE_INT masked_const;
8111 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8113 masked_const = i & 0xffff8000;
8114 if (masked_const != 0)
8117 masked_const = i & 0xf8000000;
8118 if (masked_const != 0)
8121 *total = COSTS_N_INSNS (cost);
8127 *total = COSTS_N_INSNS (3);
8131 /* Requires a lib call */
8132 *total = COSTS_N_INSNS (20);
8136 return arm_rtx_costs_1 (x, outer_code, total, speed);
8141 /* RTX costs for 9e (and later) cores. */
8144 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8145 int *total, bool speed)
8147 enum machine_mode mode = GET_MODE (x);
8154 *total = COSTS_N_INSNS (3);
8158 *total = thumb1_rtx_costs (x, code, outer_code);
8166 /* There is no point basing this on the tuning, since it is always the
8167 fast variant if it exists at all. */
8169 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8170 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8171 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8173 *total = COSTS_N_INSNS (2);
8180 *total = COSTS_N_INSNS (5);
8186 *total = COSTS_N_INSNS (2);
8190 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8192 if (TARGET_HARD_FLOAT
8194 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8196 *total = COSTS_N_INSNS (1);
8201 *total = COSTS_N_INSNS (20);
8205 return arm_rtx_costs_1 (x, outer_code, total, speed);
8208 /* All address computations that can be done are free, but rtx cost returns
8209 the same for practically all of them. So we weight the different types
8210 of address here in the order (most pref first):
8211 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8213 arm_arm_address_cost (rtx x)
8215 enum rtx_code c = GET_CODE (x);
8217 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8219 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8224 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8227 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8237 arm_thumb_address_cost (rtx x)
8239 enum rtx_code c = GET_CODE (x);
8244 && GET_CODE (XEXP (x, 0)) == REG
8245 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8252 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8254 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8257 /* Adjust cost hook for XScale. */
8259 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8261 /* Some true dependencies can have a higher cost depending
8262 on precisely how certain input operands are used. */
8263 if (REG_NOTE_KIND(link) == 0
8264 && recog_memoized (insn) >= 0
8265 && recog_memoized (dep) >= 0)
8267 int shift_opnum = get_attr_shift (insn);
8268 enum attr_type attr_type = get_attr_type (dep);
8270 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8271 operand for INSN. If we have a shifted input operand and the
8272 instruction we depend on is another ALU instruction, then we may
8273 have to account for an additional stall. */
8274 if (shift_opnum != 0
8275 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8277 rtx shifted_operand;
8280 /* Get the shifted operand. */
8281 extract_insn (insn);
8282 shifted_operand = recog_data.operand[shift_opnum];
8284 /* Iterate over all the operands in DEP. If we write an operand
8285 that overlaps with SHIFTED_OPERAND, then we have increase the
8286 cost of this dependency. */
8288 preprocess_constraints ();
8289 for (opno = 0; opno < recog_data.n_operands; opno++)
8291 /* We can ignore strict inputs. */
8292 if (recog_data.operand_type[opno] == OP_IN)
8295 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8307 /* Adjust cost hook for Cortex A9. */
8309 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8311 switch (REG_NOTE_KIND (link))
8318 case REG_DEP_OUTPUT:
8319 if (recog_memoized (insn) >= 0
8320 && recog_memoized (dep) >= 0)
8322 if (GET_CODE (PATTERN (insn)) == SET)
8325 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8327 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8329 enum attr_type attr_type_insn = get_attr_type (insn);
8330 enum attr_type attr_type_dep = get_attr_type (dep);
8332 /* By default all dependencies of the form
8335 have an extra latency of 1 cycle because
8336 of the input and output dependency in this
8337 case. However this gets modeled as an true
8338 dependency and hence all these checks. */
8339 if (REG_P (SET_DEST (PATTERN (insn)))
8340 && REG_P (SET_DEST (PATTERN (dep)))
8341 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8342 SET_DEST (PATTERN (dep))))
8344 /* FMACS is a special case where the dependant
8345 instruction can be issued 3 cycles before
8346 the normal latency in case of an output
8348 if ((attr_type_insn == TYPE_FMACS
8349 || attr_type_insn == TYPE_FMACD)
8350 && (attr_type_dep == TYPE_FMACS
8351 || attr_type_dep == TYPE_FMACD))
8353 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8354 *cost = insn_default_latency (dep) - 3;
8356 *cost = insn_default_latency (dep);
8361 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8362 *cost = insn_default_latency (dep) + 1;
8364 *cost = insn_default_latency (dep);
8380 /* Adjust cost hook for FA726TE. */
8382 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8384 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8385 have penalty of 3. */
8386 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8387 && recog_memoized (insn) >= 0
8388 && recog_memoized (dep) >= 0
8389 && get_attr_conds (dep) == CONDS_SET)
8391 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8392 if (get_attr_conds (insn) == CONDS_USE
8393 && get_attr_type (insn) != TYPE_BRANCH)
8399 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8400 || get_attr_conds (insn) == CONDS_USE)
8410 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8411 It corrects the value of COST based on the relationship between
8412 INSN and DEP through the dependence LINK. It returns the new
8413 value. There is a per-core adjust_cost hook to adjust scheduler costs
8414 and the per-core hook can choose to completely override the generic
8415 adjust_cost function. Only put bits of code into arm_adjust_cost that
8416 are common across all cores. */
8418 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8422 /* When generating Thumb-1 code, we want to place flag-setting operations
8423 close to a conditional branch which depends on them, so that we can
8424 omit the comparison. */
8426 && REG_NOTE_KIND (link) == 0
8427 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8428 && recog_memoized (dep) >= 0
8429 && get_attr_conds (dep) == CONDS_SET)
8432 if (current_tune->sched_adjust_cost != NULL)
8434 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8438 /* XXX This is not strictly true for the FPA. */
8439 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8440 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8443 /* Call insns don't incur a stall, even if they follow a load. */
8444 if (REG_NOTE_KIND (link) == 0
8445 && GET_CODE (insn) == CALL_INSN)
8448 if ((i_pat = single_set (insn)) != NULL
8449 && GET_CODE (SET_SRC (i_pat)) == MEM
8450 && (d_pat = single_set (dep)) != NULL
8451 && GET_CODE (SET_DEST (d_pat)) == MEM)
8453 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8454 /* This is a load after a store, there is no conflict if the load reads
8455 from a cached area. Assume that loads from the stack, and from the
8456 constant pool are cached, and that others will miss. This is a
8459 if ((GET_CODE (src_mem) == SYMBOL_REF
8460 && CONSTANT_POOL_ADDRESS_P (src_mem))
8461 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8462 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8463 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8471 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8474 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8476 return (optimize > 0) ? 2 : 0;
8480 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8482 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8485 static int fp_consts_inited = 0;
8487 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8488 static const char * const strings_fp[8] =
8491 "4", "5", "0.5", "10"
8494 static REAL_VALUE_TYPE values_fp[8];
8497 init_fp_table (void)
8503 fp_consts_inited = 1;
8505 fp_consts_inited = 8;
8507 for (i = 0; i < fp_consts_inited; i++)
8509 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8514 /* Return TRUE if rtx X is a valid immediate FP constant. */
8516 arm_const_double_rtx (rtx x)
8521 if (!fp_consts_inited)
8524 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8525 if (REAL_VALUE_MINUS_ZERO (r))
8528 for (i = 0; i < fp_consts_inited; i++)
8529 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8535 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8537 neg_const_double_rtx_ok_for_fpa (rtx x)
8542 if (!fp_consts_inited)
8545 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8546 r = real_value_negate (&r);
8547 if (REAL_VALUE_MINUS_ZERO (r))
8550 for (i = 0; i < 8; i++)
8551 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8558 /* VFPv3 has a fairly wide range of representable immediates, formed from
8559 "quarter-precision" floating-point values. These can be evaluated using this
8560 formula (with ^ for exponentiation):
8564 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8565 16 <= n <= 31 and 0 <= r <= 7.
8567 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8569 - A (most-significant) is the sign bit.
8570 - BCD are the exponent (encoded as r XOR 3).
8571 - EFGH are the mantissa (encoded as n - 16).
8574 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8575 fconst[sd] instruction, or -1 if X isn't suitable. */
8577 vfp3_const_double_index (rtx x)
8579 REAL_VALUE_TYPE r, m;
8581 unsigned HOST_WIDE_INT mantissa, mant_hi;
8582 unsigned HOST_WIDE_INT mask;
8583 HOST_WIDE_INT m1, m2;
8584 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8586 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8589 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8591 /* We can't represent these things, so detect them first. */
8592 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8595 /* Extract sign, exponent and mantissa. */
8596 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8597 r = real_value_abs (&r);
8598 exponent = REAL_EXP (&r);
8599 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8600 highest (sign) bit, with a fixed binary point at bit point_pos.
8601 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8602 bits for the mantissa, this may fail (low bits would be lost). */
8603 real_ldexp (&m, &r, point_pos - exponent);
8604 REAL_VALUE_TO_INT (&m1, &m2, m);
8608 /* If there are bits set in the low part of the mantissa, we can't
8609 represent this value. */
8613 /* Now make it so that mantissa contains the most-significant bits, and move
8614 the point_pos to indicate that the least-significant bits have been
8616 point_pos -= HOST_BITS_PER_WIDE_INT;
8619 /* We can permit four significant bits of mantissa only, plus a high bit
8620 which is always 1. */
8621 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8622 if ((mantissa & mask) != 0)
8625 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8626 mantissa >>= point_pos - 5;
8628 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8629 floating-point immediate zero with Neon using an integer-zero load, but
8630 that case is handled elsewhere.) */
8634 gcc_assert (mantissa >= 16 && mantissa <= 31);
8636 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8637 normalized significands are in the range [1, 2). (Our mantissa is shifted
8638 left 4 places at this point relative to normalized IEEE754 values). GCC
8639 internally uses [0.5, 1) (see real.c), so the exponent returned from
8640 REAL_EXP must be altered. */
8641 exponent = 5 - exponent;
8643 if (exponent < 0 || exponent > 7)
8646 /* Sign, mantissa and exponent are now in the correct form to plug into the
8647 formula described in the comment above. */
8648 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8651 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8653 vfp3_const_double_rtx (rtx x)
8658 return vfp3_const_double_index (x) != -1;
8661 /* Recognize immediates which can be used in various Neon instructions. Legal
8662 immediates are described by the following table (for VMVN variants, the
8663 bitwise inverse of the constant shown is recognized. In either case, VMOV
8664 is output and the correct instruction to use for a given constant is chosen
8665 by the assembler). The constant shown is replicated across all elements of
8666 the destination vector.
8668 insn elems variant constant (binary)
8669 ---- ----- ------- -----------------
8670 vmov i32 0 00000000 00000000 00000000 abcdefgh
8671 vmov i32 1 00000000 00000000 abcdefgh 00000000
8672 vmov i32 2 00000000 abcdefgh 00000000 00000000
8673 vmov i32 3 abcdefgh 00000000 00000000 00000000
8674 vmov i16 4 00000000 abcdefgh
8675 vmov i16 5 abcdefgh 00000000
8676 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8677 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8678 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8679 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8680 vmvn i16 10 00000000 abcdefgh
8681 vmvn i16 11 abcdefgh 00000000
8682 vmov i32 12 00000000 00000000 abcdefgh 11111111
8683 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8684 vmov i32 14 00000000 abcdefgh 11111111 11111111
8685 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8687 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8688 eeeeeeee ffffffff gggggggg hhhhhhhh
8689 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8691 For case 18, B = !b. Representable values are exactly those accepted by
8692 vfp3_const_double_index, but are output as floating-point numbers rather
8695 Variants 0-5 (inclusive) may also be used as immediates for the second
8696 operand of VORR/VBIC instructions.
8698 The INVERSE argument causes the bitwise inverse of the given operand to be
8699 recognized instead (used for recognizing legal immediates for the VAND/VORN
8700 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8701 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8702 output, rather than the real insns vbic/vorr).
8704 INVERSE makes no difference to the recognition of float vectors.
8706 The return value is the variant of immediate as shown in the above table, or
8707 -1 if the given value doesn't match any of the listed patterns.
8710 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8711 rtx *modconst, int *elementwidth)
8713 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8715 for (i = 0; i < idx; i += (STRIDE)) \
8720 immtype = (CLASS); \
8721 elsize = (ELSIZE); \
8725 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8726 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8727 unsigned char bytes[16];
8728 int immtype = -1, matches;
8729 unsigned int invmask = inverse ? 0xff : 0;
8731 /* Vectors of float constants. */
8732 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8734 rtx el0 = CONST_VECTOR_ELT (op, 0);
8737 if (!vfp3_const_double_rtx (el0))
8740 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8742 for (i = 1; i < n_elts; i++)
8744 rtx elt = CONST_VECTOR_ELT (op, i);
8747 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8749 if (!REAL_VALUES_EQUAL (r0, re))
8754 *modconst = CONST_VECTOR_ELT (op, 0);
8762 /* Splat vector constant out into a byte vector. */
8763 for (i = 0; i < n_elts; i++)
8765 rtx el = CONST_VECTOR_ELT (op, i);
8766 unsigned HOST_WIDE_INT elpart;
8767 unsigned int part, parts;
8769 if (GET_CODE (el) == CONST_INT)
8771 elpart = INTVAL (el);
8774 else if (GET_CODE (el) == CONST_DOUBLE)
8776 elpart = CONST_DOUBLE_LOW (el);
8782 for (part = 0; part < parts; part++)
8785 for (byte = 0; byte < innersize; byte++)
8787 bytes[idx++] = (elpart & 0xff) ^ invmask;
8788 elpart >>= BITS_PER_UNIT;
8790 if (GET_CODE (el) == CONST_DOUBLE)
8791 elpart = CONST_DOUBLE_HIGH (el);
8796 gcc_assert (idx == GET_MODE_SIZE (mode));
8800 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8801 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8803 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8804 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8806 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8807 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8809 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8810 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8812 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8814 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8816 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8817 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8819 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8820 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8822 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8823 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8825 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8826 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8828 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8830 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8832 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8833 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8835 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8836 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8838 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8839 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8841 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8842 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8844 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8846 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8847 && bytes[i] == bytes[(i + 8) % idx]);
8855 *elementwidth = elsize;
8859 unsigned HOST_WIDE_INT imm = 0;
8861 /* Un-invert bytes of recognized vector, if necessary. */
8863 for (i = 0; i < idx; i++)
8864 bytes[i] ^= invmask;
8868 /* FIXME: Broken on 32-bit H_W_I hosts. */
8869 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8871 for (i = 0; i < 8; i++)
8872 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8873 << (i * BITS_PER_UNIT);
8875 *modconst = GEN_INT (imm);
8879 unsigned HOST_WIDE_INT imm = 0;
8881 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8882 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8884 *modconst = GEN_INT (imm);
8892 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8893 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8894 float elements), and a modified constant (whatever should be output for a
8895 VMOV) in *MODCONST. */
8898 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8899 rtx *modconst, int *elementwidth)
8903 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8909 *modconst = tmpconst;
8912 *elementwidth = tmpwidth;
8917 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8918 the immediate is valid, write a constant suitable for using as an operand
8919 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8920 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8923 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8924 rtx *modconst, int *elementwidth)
8928 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8930 if (retval < 0 || retval > 5)
8934 *modconst = tmpconst;
8937 *elementwidth = tmpwidth;
8942 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
8943 the immediate is valid, write a constant suitable for using as an operand
8944 to VSHR/VSHL to *MODCONST and the corresponding element width to
8945 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
8946 because they have different limitations. */
8949 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
8950 rtx *modconst, int *elementwidth,
8953 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8954 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
8955 unsigned HOST_WIDE_INT last_elt = 0;
8956 unsigned HOST_WIDE_INT maxshift;
8958 /* Split vector constant out into a byte vector. */
8959 for (i = 0; i < n_elts; i++)
8961 rtx el = CONST_VECTOR_ELT (op, i);
8962 unsigned HOST_WIDE_INT elpart;
8964 if (GET_CODE (el) == CONST_INT)
8965 elpart = INTVAL (el);
8966 else if (GET_CODE (el) == CONST_DOUBLE)
8971 if (i != 0 && elpart != last_elt)
8977 /* Shift less than element size. */
8978 maxshift = innersize * 8;
8982 /* Left shift immediate value can be from 0 to <size>-1. */
8983 if (last_elt >= maxshift)
8988 /* Right shift immediate value can be from 1 to <size>. */
8989 if (last_elt == 0 || last_elt > maxshift)
8994 *elementwidth = innersize * 8;
8997 *modconst = CONST_VECTOR_ELT (op, 0);
9002 /* Return a string suitable for output of Neon immediate logic operation
9006 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9007 int inverse, int quad)
9009 int width, is_valid;
9010 static char templ[40];
9012 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9014 gcc_assert (is_valid != 0);
9017 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9019 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9024 /* Return a string suitable for output of Neon immediate shift operation
9025 (VSHR or VSHL) MNEM. */
9028 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9029 enum machine_mode mode, int quad,
9032 int width, is_valid;
9033 static char templ[40];
9035 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9036 gcc_assert (is_valid != 0);
9039 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9041 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9046 /* Output a sequence of pairwise operations to implement a reduction.
9047 NOTE: We do "too much work" here, because pairwise operations work on two
9048 registers-worth of operands in one go. Unfortunately we can't exploit those
9049 extra calculations to do the full operation in fewer steps, I don't think.
9050 Although all vector elements of the result but the first are ignored, we
9051 actually calculate the same result in each of the elements. An alternative
9052 such as initially loading a vector with zero to use as each of the second
9053 operands would use up an additional register and take an extra instruction,
9054 for no particular gain. */
9057 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9058 rtx (*reduc) (rtx, rtx, rtx))
9060 enum machine_mode inner = GET_MODE_INNER (mode);
9061 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9064 for (i = parts / 2; i >= 1; i /= 2)
9066 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9067 emit_insn (reduc (dest, tmpsum, tmpsum));
9072 /* If VALS is a vector constant that can be loaded into a register
9073 using VDUP, generate instructions to do so and return an RTX to
9074 assign to the register. Otherwise return NULL_RTX. */
9077 neon_vdup_constant (rtx vals)
9079 enum machine_mode mode = GET_MODE (vals);
9080 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9081 int n_elts = GET_MODE_NUNITS (mode);
9082 bool all_same = true;
9086 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9089 for (i = 0; i < n_elts; ++i)
9091 x = XVECEXP (vals, 0, i);
9092 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9097 /* The elements are not all the same. We could handle repeating
9098 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9099 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9103 /* We can load this constant by using VDUP and a constant in a
9104 single ARM register. This will be cheaper than a vector
9107 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9108 return gen_rtx_VEC_DUPLICATE (mode, x);
9111 /* Generate code to load VALS, which is a PARALLEL containing only
9112 constants (for vec_init) or CONST_VECTOR, efficiently into a
9113 register. Returns an RTX to copy into the register, or NULL_RTX
9114 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9117 neon_make_constant (rtx vals)
9119 enum machine_mode mode = GET_MODE (vals);
9121 rtx const_vec = NULL_RTX;
9122 int n_elts = GET_MODE_NUNITS (mode);
9126 if (GET_CODE (vals) == CONST_VECTOR)
9128 else if (GET_CODE (vals) == PARALLEL)
9130 /* A CONST_VECTOR must contain only CONST_INTs and
9131 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9132 Only store valid constants in a CONST_VECTOR. */
9133 for (i = 0; i < n_elts; ++i)
9135 rtx x = XVECEXP (vals, 0, i);
9136 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
9139 if (n_const == n_elts)
9140 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9145 if (const_vec != NULL
9146 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9147 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9149 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9150 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9151 pipeline cycle; creating the constant takes one or two ARM
9154 else if (const_vec != NULL_RTX)
9155 /* Load from constant pool. On Cortex-A8 this takes two cycles
9156 (for either double or quad vectors). We can not take advantage
9157 of single-cycle VLD1 because we need a PC-relative addressing
9161 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9162 We can not construct an initializer. */
9166 /* Initialize vector TARGET to VALS. */
9169 neon_expand_vector_init (rtx target, rtx vals)
9171 enum machine_mode mode = GET_MODE (target);
9172 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9173 int n_elts = GET_MODE_NUNITS (mode);
9174 int n_var = 0, one_var = -1;
9175 bool all_same = true;
9179 for (i = 0; i < n_elts; ++i)
9181 x = XVECEXP (vals, 0, i);
9182 if (!CONSTANT_P (x))
9183 ++n_var, one_var = i;
9185 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9191 rtx constant = neon_make_constant (vals);
9192 if (constant != NULL_RTX)
9194 emit_move_insn (target, constant);
9199 /* Splat a single non-constant element if we can. */
9200 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9202 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9203 emit_insn (gen_rtx_SET (VOIDmode, target,
9204 gen_rtx_VEC_DUPLICATE (mode, x)));
9208 /* One field is non-constant. Load constant then overwrite varying
9209 field. This is more efficient than using the stack. */
9212 rtx copy = copy_rtx (vals);
9213 rtx index = GEN_INT (one_var);
9215 /* Load constant part of vector, substitute neighboring value for
9217 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9218 neon_expand_vector_init (target, copy);
9220 /* Insert variable. */
9221 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9225 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9228 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9231 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9234 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9237 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9240 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9243 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9246 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9249 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9257 /* Construct the vector in memory one field at a time
9258 and load the whole vector. */
9259 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
9260 for (i = 0; i < n_elts; i++)
9261 emit_move_insn (adjust_address_nv (mem, inner_mode,
9262 i * GET_MODE_SIZE (inner_mode)),
9263 XVECEXP (vals, 0, i));
9264 emit_move_insn (target, mem);
9267 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9268 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9269 reported source locations are bogus. */
9272 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9277 gcc_assert (GET_CODE (operand) == CONST_INT);
9279 lane = INTVAL (operand);
9281 if (lane < low || lane >= high)
9285 /* Bounds-check lanes. */
9288 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9290 bounds_check (operand, low, high, "lane out of range");
9293 /* Bounds-check constants. */
9296 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9298 bounds_check (operand, low, high, "constant out of range");
9302 neon_element_bits (enum machine_mode mode)
9305 return GET_MODE_BITSIZE (mode);
9307 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9311 /* Predicates for `match_operand' and `match_operator'. */
9313 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
9315 cirrus_memory_offset (rtx op)
9317 /* Reject eliminable registers. */
9318 if (! (reload_in_progress || reload_completed)
9319 && ( reg_mentioned_p (frame_pointer_rtx, op)
9320 || reg_mentioned_p (arg_pointer_rtx, op)
9321 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9322 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9323 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9324 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9327 if (GET_CODE (op) == MEM)
9333 /* Match: (mem (reg)). */
9334 if (GET_CODE (ind) == REG)
9340 if (GET_CODE (ind) == PLUS
9341 && GET_CODE (XEXP (ind, 0)) == REG
9342 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9343 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
9350 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9351 WB is true if full writeback address modes are allowed and is false
9352 if limited writeback address modes (POST_INC and PRE_DEC) are
9356 arm_coproc_mem_operand (rtx op, bool wb)
9360 /* Reject eliminable registers. */
9361 if (! (reload_in_progress || reload_completed)
9362 && ( reg_mentioned_p (frame_pointer_rtx, op)
9363 || reg_mentioned_p (arg_pointer_rtx, op)
9364 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9365 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9366 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9367 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9370 /* Constants are converted into offsets from labels. */
9371 if (GET_CODE (op) != MEM)
9376 if (reload_completed
9377 && (GET_CODE (ind) == LABEL_REF
9378 || (GET_CODE (ind) == CONST
9379 && GET_CODE (XEXP (ind, 0)) == PLUS
9380 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9381 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9384 /* Match: (mem (reg)). */
9385 if (GET_CODE (ind) == REG)
9386 return arm_address_register_rtx_p (ind, 0);
9388 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9389 acceptable in any case (subject to verification by
9390 arm_address_register_rtx_p). We need WB to be true to accept
9391 PRE_INC and POST_DEC. */
9392 if (GET_CODE (ind) == POST_INC
9393 || GET_CODE (ind) == PRE_DEC
9395 && (GET_CODE (ind) == PRE_INC
9396 || GET_CODE (ind) == POST_DEC)))
9397 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9400 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9401 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9402 && GET_CODE (XEXP (ind, 1)) == PLUS
9403 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9404 ind = XEXP (ind, 1);
9409 if (GET_CODE (ind) == PLUS
9410 && GET_CODE (XEXP (ind, 0)) == REG
9411 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9412 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9413 && INTVAL (XEXP (ind, 1)) > -1024
9414 && INTVAL (XEXP (ind, 1)) < 1024
9415 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9421 /* Return TRUE if OP is a memory operand which we can load or store a vector
9422 to/from. TYPE is one of the following values:
9423 0 - Vector load/stor (vldr)
9424 1 - Core registers (ldm)
9425 2 - Element/structure loads (vld1)
9428 neon_vector_mem_operand (rtx op, int type)
9432 /* Reject eliminable registers. */
9433 if (! (reload_in_progress || reload_completed)
9434 && ( reg_mentioned_p (frame_pointer_rtx, op)
9435 || reg_mentioned_p (arg_pointer_rtx, op)
9436 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9437 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9438 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9439 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9442 /* Constants are converted into offsets from labels. */
9443 if (GET_CODE (op) != MEM)
9448 if (reload_completed
9449 && (GET_CODE (ind) == LABEL_REF
9450 || (GET_CODE (ind) == CONST
9451 && GET_CODE (XEXP (ind, 0)) == PLUS
9452 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9453 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9456 /* Match: (mem (reg)). */
9457 if (GET_CODE (ind) == REG)
9458 return arm_address_register_rtx_p (ind, 0);
9460 /* Allow post-increment with Neon registers. */
9461 if ((type != 1 && GET_CODE (ind) == POST_INC)
9462 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9463 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9465 /* FIXME: vld1 allows register post-modify. */
9471 && GET_CODE (ind) == PLUS
9472 && GET_CODE (XEXP (ind, 0)) == REG
9473 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9474 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9475 && INTVAL (XEXP (ind, 1)) > -1024
9476 && INTVAL (XEXP (ind, 1)) < 1016
9477 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9483 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9486 neon_struct_mem_operand (rtx op)
9490 /* Reject eliminable registers. */
9491 if (! (reload_in_progress || reload_completed)
9492 && ( reg_mentioned_p (frame_pointer_rtx, op)
9493 || reg_mentioned_p (arg_pointer_rtx, op)
9494 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9495 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9496 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9497 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9500 /* Constants are converted into offsets from labels. */
9501 if (GET_CODE (op) != MEM)
9506 if (reload_completed
9507 && (GET_CODE (ind) == LABEL_REF
9508 || (GET_CODE (ind) == CONST
9509 && GET_CODE (XEXP (ind, 0)) == PLUS
9510 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9511 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9514 /* Match: (mem (reg)). */
9515 if (GET_CODE (ind) == REG)
9516 return arm_address_register_rtx_p (ind, 0);
9518 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9519 if (GET_CODE (ind) == POST_INC
9520 || GET_CODE (ind) == PRE_DEC)
9521 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9526 /* Return true if X is a register that will be eliminated later on. */
9528 arm_eliminable_register (rtx x)
9530 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9531 || REGNO (x) == ARG_POINTER_REGNUM
9532 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9533 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9536 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9537 coprocessor registers. Otherwise return NO_REGS. */
9540 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9544 if (!TARGET_NEON_FP16)
9545 return GENERAL_REGS;
9546 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9548 return GENERAL_REGS;
9551 /* The neon move patterns handle all legitimate vector and struct
9554 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9555 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9556 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9557 || VALID_NEON_STRUCT_MODE (mode)))
9560 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9563 return GENERAL_REGS;
9566 /* Values which must be returned in the most-significant end of the return
9570 arm_return_in_msb (const_tree valtype)
9572 return (TARGET_AAPCS_BASED
9574 && (AGGREGATE_TYPE_P (valtype)
9575 || TREE_CODE (valtype) == COMPLEX_TYPE
9576 || FIXED_POINT_TYPE_P (valtype)));
9579 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9580 Use by the Cirrus Maverick code which has to workaround
9581 a hardware bug triggered by such instructions. */
9583 arm_memory_load_p (rtx insn)
9585 rtx body, lhs, rhs;;
9587 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9590 body = PATTERN (insn);
9592 if (GET_CODE (body) != SET)
9595 lhs = XEXP (body, 0);
9596 rhs = XEXP (body, 1);
9598 lhs = REG_OR_SUBREG_RTX (lhs);
9600 /* If the destination is not a general purpose
9601 register we do not have to worry. */
9602 if (GET_CODE (lhs) != REG
9603 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9606 /* As well as loads from memory we also have to react
9607 to loads of invalid constants which will be turned
9608 into loads from the minipool. */
9609 return (GET_CODE (rhs) == MEM
9610 || GET_CODE (rhs) == SYMBOL_REF
9611 || note_invalid_constants (insn, -1, false));
9614 /* Return TRUE if INSN is a Cirrus instruction. */
9616 arm_cirrus_insn_p (rtx insn)
9618 enum attr_cirrus attr;
9620 /* get_attr cannot accept USE or CLOBBER. */
9622 || GET_CODE (insn) != INSN
9623 || GET_CODE (PATTERN (insn)) == USE
9624 || GET_CODE (PATTERN (insn)) == CLOBBER)
9627 attr = get_attr_cirrus (insn);
9629 return attr != CIRRUS_NOT;
9632 /* Cirrus reorg for invalid instruction combinations. */
9634 cirrus_reorg (rtx first)
9636 enum attr_cirrus attr;
9637 rtx body = PATTERN (first);
9641 /* Any branch must be followed by 2 non Cirrus instructions. */
9642 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9645 t = next_nonnote_insn (first);
9647 if (arm_cirrus_insn_p (t))
9650 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9654 emit_insn_after (gen_nop (), first);
9659 /* (float (blah)) is in parallel with a clobber. */
9660 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9661 body = XVECEXP (body, 0, 0);
9663 if (GET_CODE (body) == SET)
9665 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9667 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9668 be followed by a non Cirrus insn. */
9669 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9671 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9672 emit_insn_after (gen_nop (), first);
9676 else if (arm_memory_load_p (first))
9678 unsigned int arm_regno;
9680 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9681 ldr/cfmv64hr combination where the Rd field is the same
9682 in both instructions must be split with a non Cirrus
9689 /* Get Arm register number for ldr insn. */
9690 if (GET_CODE (lhs) == REG)
9691 arm_regno = REGNO (lhs);
9694 gcc_assert (GET_CODE (rhs) == REG);
9695 arm_regno = REGNO (rhs);
9699 first = next_nonnote_insn (first);
9701 if (! arm_cirrus_insn_p (first))
9704 body = PATTERN (first);
9706 /* (float (blah)) is in parallel with a clobber. */
9707 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9708 body = XVECEXP (body, 0, 0);
9710 if (GET_CODE (body) == FLOAT)
9711 body = XEXP (body, 0);
9713 if (get_attr_cirrus (first) == CIRRUS_MOVE
9714 && GET_CODE (XEXP (body, 1)) == REG
9715 && arm_regno == REGNO (XEXP (body, 1)))
9716 emit_insn_after (gen_nop (), first);
9722 /* get_attr cannot accept USE or CLOBBER. */
9724 || GET_CODE (first) != INSN
9725 || GET_CODE (PATTERN (first)) == USE
9726 || GET_CODE (PATTERN (first)) == CLOBBER)
9729 attr = get_attr_cirrus (first);
9731 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9732 must be followed by a non-coprocessor instruction. */
9733 if (attr == CIRRUS_COMPARE)
9737 t = next_nonnote_insn (first);
9739 if (arm_cirrus_insn_p (t))
9742 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9746 emit_insn_after (gen_nop (), first);
9752 /* Return TRUE if X references a SYMBOL_REF. */
9754 symbol_mentioned_p (rtx x)
9759 if (GET_CODE (x) == SYMBOL_REF)
9762 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9763 are constant offsets, not symbols. */
9764 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9767 fmt = GET_RTX_FORMAT (GET_CODE (x));
9769 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9775 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9776 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9779 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9786 /* Return TRUE if X references a LABEL_REF. */
9788 label_mentioned_p (rtx x)
9793 if (GET_CODE (x) == LABEL_REF)
9796 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9797 instruction, but they are constant offsets, not symbols. */
9798 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9801 fmt = GET_RTX_FORMAT (GET_CODE (x));
9802 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9808 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9809 if (label_mentioned_p (XVECEXP (x, i, j)))
9812 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9820 tls_mentioned_p (rtx x)
9822 switch (GET_CODE (x))
9825 return tls_mentioned_p (XEXP (x, 0));
9828 if (XINT (x, 1) == UNSPEC_TLS)
9836 /* Must not copy any rtx that uses a pc-relative address. */
9839 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9841 if (GET_CODE (*x) == UNSPEC
9842 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9848 arm_cannot_copy_insn_p (rtx insn)
9850 /* The tls call insn cannot be copied, as it is paired with a data
9852 if (recog_memoized (insn) == CODE_FOR_tlscall)
9855 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9861 enum rtx_code code = GET_CODE (x);
9878 /* Return 1 if memory locations are adjacent. */
9880 adjacent_mem_locations (rtx a, rtx b)
9882 /* We don't guarantee to preserve the order of these memory refs. */
9883 if (volatile_refs_p (a) || volatile_refs_p (b))
9886 if ((GET_CODE (XEXP (a, 0)) == REG
9887 || (GET_CODE (XEXP (a, 0)) == PLUS
9888 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9889 && (GET_CODE (XEXP (b, 0)) == REG
9890 || (GET_CODE (XEXP (b, 0)) == PLUS
9891 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9893 HOST_WIDE_INT val0 = 0, val1 = 0;
9897 if (GET_CODE (XEXP (a, 0)) == PLUS)
9899 reg0 = XEXP (XEXP (a, 0), 0);
9900 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9905 if (GET_CODE (XEXP (b, 0)) == PLUS)
9907 reg1 = XEXP (XEXP (b, 0), 0);
9908 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9913 /* Don't accept any offset that will require multiple
9914 instructions to handle, since this would cause the
9915 arith_adjacentmem pattern to output an overlong sequence. */
9916 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9919 /* Don't allow an eliminable register: register elimination can make
9920 the offset too large. */
9921 if (arm_eliminable_register (reg0))
9924 val_diff = val1 - val0;
9928 /* If the target has load delay slots, then there's no benefit
9929 to using an ldm instruction unless the offset is zero and
9930 we are optimizing for size. */
9931 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9932 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9933 && (val_diff == 4 || val_diff == -4));
9936 return ((REGNO (reg0) == REGNO (reg1))
9937 && (val_diff == 4 || val_diff == -4));
9943 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9944 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9945 instruction. ADD_OFFSET is nonzero if the base address register needs
9946 to be modified with an add instruction before we can use it. */
9949 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9950 int nops, HOST_WIDE_INT add_offset)
9952 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9953 if the offset isn't small enough. The reason 2 ldrs are faster
9954 is because these ARMs are able to do more than one cache access
9955 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9956 whilst the ARM8 has a double bandwidth cache. This means that
9957 these cores can do both an instruction fetch and a data fetch in
9958 a single cycle, so the trick of calculating the address into a
9959 scratch register (one of the result regs) and then doing a load
9960 multiple actually becomes slower (and no smaller in code size).
9961 That is the transformation
9963 ldr rd1, [rbase + offset]
9964 ldr rd2, [rbase + offset + 4]
9968 add rd1, rbase, offset
9969 ldmia rd1, {rd1, rd2}
9971 produces worse code -- '3 cycles + any stalls on rd2' instead of
9972 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9973 access per cycle, the first sequence could never complete in less
9974 than 6 cycles, whereas the ldm sequence would only take 5 and
9975 would make better use of sequential accesses if not hitting the
9978 We cheat here and test 'arm_ld_sched' which we currently know to
9979 only be true for the ARM8, ARM9 and StrongARM. If this ever
9980 changes, then the test below needs to be reworked. */
9981 if (nops == 2 && arm_ld_sched && add_offset != 0)
9984 /* XScale has load-store double instructions, but they have stricter
9985 alignment requirements than load-store multiple, so we cannot
9988 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9989 the pipeline until completion.
9997 An ldr instruction takes 1-3 cycles, but does not block the
10006 Best case ldr will always win. However, the more ldr instructions
10007 we issue, the less likely we are to be able to schedule them well.
10008 Using ldr instructions also increases code size.
10010 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10011 for counts of 3 or 4 regs. */
10012 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10017 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10018 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10019 an array ORDER which describes the sequence to use when accessing the
10020 offsets that produces an ascending order. In this sequence, each
10021 offset must be larger by exactly 4 than the previous one. ORDER[0]
10022 must have been filled in with the lowest offset by the caller.
10023 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10024 we use to verify that ORDER produces an ascending order of registers.
10025 Return true if it was possible to construct such an order, false if
10029 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10030 int *unsorted_regs)
10033 for (i = 1; i < nops; i++)
10037 order[i] = order[i - 1];
10038 for (j = 0; j < nops; j++)
10039 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10041 /* We must find exactly one offset that is higher than the
10042 previous one by 4. */
10043 if (order[i] != order[i - 1])
10047 if (order[i] == order[i - 1])
10049 /* The register numbers must be ascending. */
10050 if (unsorted_regs != NULL
10051 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10057 /* Used to determine in a peephole whether a sequence of load
10058 instructions can be changed into a load-multiple instruction.
10059 NOPS is the number of separate load instructions we are examining. The
10060 first NOPS entries in OPERANDS are the destination registers, the
10061 next NOPS entries are memory operands. If this function is
10062 successful, *BASE is set to the common base register of the memory
10063 accesses; *LOAD_OFFSET is set to the first memory location's offset
10064 from that base register.
10065 REGS is an array filled in with the destination register numbers.
10066 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10067 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10068 the sequence of registers in REGS matches the loads from ascending memory
10069 locations, and the function verifies that the register numbers are
10070 themselves ascending. If CHECK_REGS is false, the register numbers
10071 are stored in the order they are found in the operands. */
10073 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10074 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10076 int unsorted_regs[MAX_LDM_STM_OPS];
10077 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10078 int order[MAX_LDM_STM_OPS];
10079 rtx base_reg_rtx = NULL;
10083 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10084 easily extended if required. */
10085 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10087 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10089 /* Loop over the operands and check that the memory references are
10090 suitable (i.e. immediate offsets from the same base register). At
10091 the same time, extract the target register, and the memory
10093 for (i = 0; i < nops; i++)
10098 /* Convert a subreg of a mem into the mem itself. */
10099 if (GET_CODE (operands[nops + i]) == SUBREG)
10100 operands[nops + i] = alter_subreg (operands + (nops + i));
10102 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10104 /* Don't reorder volatile memory references; it doesn't seem worth
10105 looking for the case where the order is ok anyway. */
10106 if (MEM_VOLATILE_P (operands[nops + i]))
10109 offset = const0_rtx;
10111 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10112 || (GET_CODE (reg) == SUBREG
10113 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10114 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10115 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10117 || (GET_CODE (reg) == SUBREG
10118 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10119 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10124 base_reg = REGNO (reg);
10125 base_reg_rtx = reg;
10126 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10129 else if (base_reg != (int) REGNO (reg))
10130 /* Not addressed from the same base register. */
10133 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
10134 ? REGNO (operands[i])
10135 : REGNO (SUBREG_REG (operands[i])));
10137 /* If it isn't an integer register, or if it overwrites the
10138 base register but isn't the last insn in the list, then
10139 we can't do this. */
10140 if (unsorted_regs[i] < 0
10141 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10142 || unsorted_regs[i] > 14
10143 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10146 unsorted_offsets[i] = INTVAL (offset);
10147 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10151 /* Not a suitable memory address. */
10155 /* All the useful information has now been extracted from the
10156 operands into unsorted_regs and unsorted_offsets; additionally,
10157 order[0] has been set to the lowest offset in the list. Sort
10158 the offsets into order, verifying that they are adjacent, and
10159 check that the register numbers are ascending. */
10160 if (!compute_offset_order (nops, unsorted_offsets, order,
10161 check_regs ? unsorted_regs : NULL))
10165 memcpy (saved_order, order, sizeof order);
10171 for (i = 0; i < nops; i++)
10172 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10174 *load_offset = unsorted_offsets[order[0]];
10178 && !peep2_reg_dead_p (nops, base_reg_rtx))
10181 if (unsorted_offsets[order[0]] == 0)
10182 ldm_case = 1; /* ldmia */
10183 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10184 ldm_case = 2; /* ldmib */
10185 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10186 ldm_case = 3; /* ldmda */
10187 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10188 ldm_case = 4; /* ldmdb */
10189 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10190 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10195 if (!multiple_operation_profitable_p (false, nops,
10197 ? unsorted_offsets[order[0]] : 0))
10203 /* Used to determine in a peephole whether a sequence of store instructions can
10204 be changed into a store-multiple instruction.
10205 NOPS is the number of separate store instructions we are examining.
10206 NOPS_TOTAL is the total number of instructions recognized by the peephole
10208 The first NOPS entries in OPERANDS are the source registers, the next
10209 NOPS entries are memory operands. If this function is successful, *BASE is
10210 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10211 to the first memory location's offset from that base register. REGS is an
10212 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10213 likewise filled with the corresponding rtx's.
10214 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10215 numbers to an ascending order of stores.
10216 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10217 from ascending memory locations, and the function verifies that the register
10218 numbers are themselves ascending. If CHECK_REGS is false, the register
10219 numbers are stored in the order they are found in the operands. */
10221 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10222 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10223 HOST_WIDE_INT *load_offset, bool check_regs)
10225 int unsorted_regs[MAX_LDM_STM_OPS];
10226 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10227 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10228 int order[MAX_LDM_STM_OPS];
10230 rtx base_reg_rtx = NULL;
10233 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10234 easily extended if required. */
10235 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10237 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10239 /* Loop over the operands and check that the memory references are
10240 suitable (i.e. immediate offsets from the same base register). At
10241 the same time, extract the target register, and the memory
10243 for (i = 0; i < nops; i++)
10248 /* Convert a subreg of a mem into the mem itself. */
10249 if (GET_CODE (operands[nops + i]) == SUBREG)
10250 operands[nops + i] = alter_subreg (operands + (nops + i));
10252 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10254 /* Don't reorder volatile memory references; it doesn't seem worth
10255 looking for the case where the order is ok anyway. */
10256 if (MEM_VOLATILE_P (operands[nops + i]))
10259 offset = const0_rtx;
10261 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10262 || (GET_CODE (reg) == SUBREG
10263 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10264 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10265 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10267 || (GET_CODE (reg) == SUBREG
10268 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10269 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10272 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
10273 ? operands[i] : SUBREG_REG (operands[i]));
10274 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10278 base_reg = REGNO (reg);
10279 base_reg_rtx = reg;
10280 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10283 else if (base_reg != (int) REGNO (reg))
10284 /* Not addressed from the same base register. */
10287 /* If it isn't an integer register, then we can't do this. */
10288 if (unsorted_regs[i] < 0
10289 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10290 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
10291 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10292 || unsorted_regs[i] > 14)
10295 unsorted_offsets[i] = INTVAL (offset);
10296 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10300 /* Not a suitable memory address. */
10304 /* All the useful information has now been extracted from the
10305 operands into unsorted_regs and unsorted_offsets; additionally,
10306 order[0] has been set to the lowest offset in the list. Sort
10307 the offsets into order, verifying that they are adjacent, and
10308 check that the register numbers are ascending. */
10309 if (!compute_offset_order (nops, unsorted_offsets, order,
10310 check_regs ? unsorted_regs : NULL))
10314 memcpy (saved_order, order, sizeof order);
10320 for (i = 0; i < nops; i++)
10322 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10324 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10327 *load_offset = unsorted_offsets[order[0]];
10331 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10334 if (unsorted_offsets[order[0]] == 0)
10335 stm_case = 1; /* stmia */
10336 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10337 stm_case = 2; /* stmib */
10338 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10339 stm_case = 3; /* stmda */
10340 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10341 stm_case = 4; /* stmdb */
10345 if (!multiple_operation_profitable_p (false, nops, 0))
10351 /* Routines for use in generating RTL. */
10353 /* Generate a load-multiple instruction. COUNT is the number of loads in
10354 the instruction; REGS and MEMS are arrays containing the operands.
10355 BASEREG is the base register to be used in addressing the memory operands.
10356 WBACK_OFFSET is nonzero if the instruction should update the base
10360 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10361 HOST_WIDE_INT wback_offset)
10366 if (!multiple_operation_profitable_p (false, count, 0))
10372 for (i = 0; i < count; i++)
10373 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10375 if (wback_offset != 0)
10376 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10378 seq = get_insns ();
10384 result = gen_rtx_PARALLEL (VOIDmode,
10385 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10386 if (wback_offset != 0)
10388 XVECEXP (result, 0, 0)
10389 = gen_rtx_SET (VOIDmode, basereg,
10390 plus_constant (basereg, wback_offset));
10395 for (j = 0; i < count; i++, j++)
10396 XVECEXP (result, 0, i)
10397 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10402 /* Generate a store-multiple instruction. COUNT is the number of stores in
10403 the instruction; REGS and MEMS are arrays containing the operands.
10404 BASEREG is the base register to be used in addressing the memory operands.
10405 WBACK_OFFSET is nonzero if the instruction should update the base
10409 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10410 HOST_WIDE_INT wback_offset)
10415 if (GET_CODE (basereg) == PLUS)
10416 basereg = XEXP (basereg, 0);
10418 if (!multiple_operation_profitable_p (false, count, 0))
10424 for (i = 0; i < count; i++)
10425 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10427 if (wback_offset != 0)
10428 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10430 seq = get_insns ();
10436 result = gen_rtx_PARALLEL (VOIDmode,
10437 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10438 if (wback_offset != 0)
10440 XVECEXP (result, 0, 0)
10441 = gen_rtx_SET (VOIDmode, basereg,
10442 plus_constant (basereg, wback_offset));
10447 for (j = 0; i < count; i++, j++)
10448 XVECEXP (result, 0, i)
10449 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10454 /* Generate either a load-multiple or a store-multiple instruction. This
10455 function can be used in situations where we can start with a single MEM
10456 rtx and adjust its address upwards.
10457 COUNT is the number of operations in the instruction, not counting a
10458 possible update of the base register. REGS is an array containing the
10460 BASEREG is the base register to be used in addressing the memory operands,
10461 which are constructed from BASEMEM.
10462 WRITE_BACK specifies whether the generated instruction should include an
10463 update of the base register.
10464 OFFSETP is used to pass an offset to and from this function; this offset
10465 is not used when constructing the address (instead BASEMEM should have an
10466 appropriate offset in its address), it is used only for setting
10467 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10470 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10471 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10473 rtx mems[MAX_LDM_STM_OPS];
10474 HOST_WIDE_INT offset = *offsetp;
10477 gcc_assert (count <= MAX_LDM_STM_OPS);
10479 if (GET_CODE (basereg) == PLUS)
10480 basereg = XEXP (basereg, 0);
10482 for (i = 0; i < count; i++)
10484 rtx addr = plus_constant (basereg, i * 4);
10485 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10493 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10494 write_back ? 4 * count : 0);
10496 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10497 write_back ? 4 * count : 0);
10501 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10502 rtx basemem, HOST_WIDE_INT *offsetp)
10504 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10509 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10510 rtx basemem, HOST_WIDE_INT *offsetp)
10512 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10516 /* Called from a peephole2 expander to turn a sequence of loads into an
10517 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10518 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10519 is true if we can reorder the registers because they are used commutatively
10521 Returns true iff we could generate a new instruction. */
10524 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10526 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10527 rtx mems[MAX_LDM_STM_OPS];
10528 int i, j, base_reg;
10530 HOST_WIDE_INT offset;
10531 int write_back = FALSE;
10535 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10536 &base_reg, &offset, !sort_regs);
10542 for (i = 0; i < nops - 1; i++)
10543 for (j = i + 1; j < nops; j++)
10544 if (regs[i] > regs[j])
10550 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10554 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10555 gcc_assert (ldm_case == 1 || ldm_case == 5);
10561 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10562 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10564 if (!TARGET_THUMB1)
10566 base_reg = regs[0];
10567 base_reg_rtx = newbase;
10571 for (i = 0; i < nops; i++)
10573 addr = plus_constant (base_reg_rtx, offset + i * 4);
10574 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10577 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10578 write_back ? offset + i * 4 : 0));
10582 /* Called from a peephole2 expander to turn a sequence of stores into an
10583 STM instruction. OPERANDS are the operands found by the peephole matcher;
10584 NOPS indicates how many separate stores we are trying to combine.
10585 Returns true iff we could generate a new instruction. */
10588 gen_stm_seq (rtx *operands, int nops)
10591 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10592 rtx mems[MAX_LDM_STM_OPS];
10595 HOST_WIDE_INT offset;
10596 int write_back = FALSE;
10599 bool base_reg_dies;
10601 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10602 mem_order, &base_reg, &offset, true);
10607 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10609 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10612 gcc_assert (base_reg_dies);
10618 gcc_assert (base_reg_dies);
10619 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10623 addr = plus_constant (base_reg_rtx, offset);
10625 for (i = 0; i < nops; i++)
10627 addr = plus_constant (base_reg_rtx, offset + i * 4);
10628 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10631 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10632 write_back ? offset + i * 4 : 0));
10636 /* Called from a peephole2 expander to turn a sequence of stores that are
10637 preceded by constant loads into an STM instruction. OPERANDS are the
10638 operands found by the peephole matcher; NOPS indicates how many
10639 separate stores we are trying to combine; there are 2 * NOPS
10640 instructions in the peephole.
10641 Returns true iff we could generate a new instruction. */
10644 gen_const_stm_seq (rtx *operands, int nops)
10646 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10647 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10648 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10649 rtx mems[MAX_LDM_STM_OPS];
10652 HOST_WIDE_INT offset;
10653 int write_back = FALSE;
10656 bool base_reg_dies;
10658 HARD_REG_SET allocated;
10660 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10661 mem_order, &base_reg, &offset, false);
10666 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10668 /* If the same register is used more than once, try to find a free
10670 CLEAR_HARD_REG_SET (allocated);
10671 for (i = 0; i < nops; i++)
10673 for (j = i + 1; j < nops; j++)
10674 if (regs[i] == regs[j])
10676 rtx t = peep2_find_free_register (0, nops * 2,
10677 TARGET_THUMB1 ? "l" : "r",
10678 SImode, &allocated);
10682 regs[i] = REGNO (t);
10686 /* Compute an ordering that maps the register numbers to an ascending
10689 for (i = 0; i < nops; i++)
10690 if (regs[i] < regs[reg_order[0]])
10693 for (i = 1; i < nops; i++)
10695 int this_order = reg_order[i - 1];
10696 for (j = 0; j < nops; j++)
10697 if (regs[j] > regs[reg_order[i - 1]]
10698 && (this_order == reg_order[i - 1]
10699 || regs[j] < regs[this_order]))
10701 reg_order[i] = this_order;
10704 /* Ensure that registers that must be live after the instruction end
10705 up with the correct value. */
10706 for (i = 0; i < nops; i++)
10708 int this_order = reg_order[i];
10709 if ((this_order != mem_order[i]
10710 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10711 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10715 /* Load the constants. */
10716 for (i = 0; i < nops; i++)
10718 rtx op = operands[2 * nops + mem_order[i]];
10719 sorted_regs[i] = regs[reg_order[i]];
10720 emit_move_insn (reg_rtxs[reg_order[i]], op);
10723 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10725 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10728 gcc_assert (base_reg_dies);
10734 gcc_assert (base_reg_dies);
10735 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10739 addr = plus_constant (base_reg_rtx, offset);
10741 for (i = 0; i < nops; i++)
10743 addr = plus_constant (base_reg_rtx, offset + i * 4);
10744 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10747 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10748 write_back ? offset + i * 4 : 0));
10753 arm_gen_movmemqi (rtx *operands)
10755 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10756 HOST_WIDE_INT srcoffset, dstoffset;
10758 rtx src, dst, srcbase, dstbase;
10759 rtx part_bytes_reg = NULL;
10762 if (GET_CODE (operands[2]) != CONST_INT
10763 || GET_CODE (operands[3]) != CONST_INT
10764 || INTVAL (operands[2]) > 64
10765 || INTVAL (operands[3]) & 3)
10768 dstbase = operands[0];
10769 srcbase = operands[1];
10771 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10772 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10774 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10775 out_words_to_go = INTVAL (operands[2]) / 4;
10776 last_bytes = INTVAL (operands[2]) & 3;
10777 dstoffset = srcoffset = 0;
10779 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10780 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10782 for (i = 0; in_words_to_go >= 2; i+=4)
10784 if (in_words_to_go > 4)
10785 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10786 TRUE, srcbase, &srcoffset));
10788 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10789 src, FALSE, srcbase,
10792 if (out_words_to_go)
10794 if (out_words_to_go > 4)
10795 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10796 TRUE, dstbase, &dstoffset));
10797 else if (out_words_to_go != 1)
10798 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10799 out_words_to_go, dst,
10802 dstbase, &dstoffset));
10805 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10806 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10807 if (last_bytes != 0)
10809 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10815 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10816 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10819 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10820 if (out_words_to_go)
10824 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10825 sreg = copy_to_reg (mem);
10827 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10828 emit_move_insn (mem, sreg);
10831 gcc_assert (!in_words_to_go); /* Sanity check */
10834 if (in_words_to_go)
10836 gcc_assert (in_words_to_go > 0);
10838 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10839 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10842 gcc_assert (!last_bytes || part_bytes_reg);
10844 if (BYTES_BIG_ENDIAN && last_bytes)
10846 rtx tmp = gen_reg_rtx (SImode);
10848 /* The bytes we want are in the top end of the word. */
10849 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10850 GEN_INT (8 * (4 - last_bytes))));
10851 part_bytes_reg = tmp;
10855 mem = adjust_automodify_address (dstbase, QImode,
10856 plus_constant (dst, last_bytes - 1),
10857 dstoffset + last_bytes - 1);
10858 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10862 tmp = gen_reg_rtx (SImode);
10863 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10864 part_bytes_reg = tmp;
10871 if (last_bytes > 1)
10873 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10874 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10878 rtx tmp = gen_reg_rtx (SImode);
10879 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10880 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10881 part_bytes_reg = tmp;
10888 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10889 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10896 /* Select a dominance comparison mode if possible for a test of the general
10897 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10898 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10899 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10900 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10901 In all cases OP will be either EQ or NE, but we don't need to know which
10902 here. If we are unable to support a dominance comparison we return
10903 CC mode. This will then fail to match for the RTL expressions that
10904 generate this call. */
10906 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10908 enum rtx_code cond1, cond2;
10911 /* Currently we will probably get the wrong result if the individual
10912 comparisons are not simple. This also ensures that it is safe to
10913 reverse a comparison if necessary. */
10914 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10916 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10920 /* The if_then_else variant of this tests the second condition if the
10921 first passes, but is true if the first fails. Reverse the first
10922 condition to get a true "inclusive-or" expression. */
10923 if (cond_or == DOM_CC_NX_OR_Y)
10924 cond1 = reverse_condition (cond1);
10926 /* If the comparisons are not equal, and one doesn't dominate the other,
10927 then we can't do this. */
10929 && !comparison_dominates_p (cond1, cond2)
10930 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10935 enum rtx_code temp = cond1;
10943 if (cond_or == DOM_CC_X_AND_Y)
10948 case EQ: return CC_DEQmode;
10949 case LE: return CC_DLEmode;
10950 case LEU: return CC_DLEUmode;
10951 case GE: return CC_DGEmode;
10952 case GEU: return CC_DGEUmode;
10953 default: gcc_unreachable ();
10957 if (cond_or == DOM_CC_X_AND_Y)
10969 gcc_unreachable ();
10973 if (cond_or == DOM_CC_X_AND_Y)
10985 gcc_unreachable ();
10989 if (cond_or == DOM_CC_X_AND_Y)
10990 return CC_DLTUmode;
10995 return CC_DLTUmode;
10997 return CC_DLEUmode;
11001 gcc_unreachable ();
11005 if (cond_or == DOM_CC_X_AND_Y)
11006 return CC_DGTUmode;
11011 return CC_DGTUmode;
11013 return CC_DGEUmode;
11017 gcc_unreachable ();
11020 /* The remaining cases only occur when both comparisons are the
11023 gcc_assert (cond1 == cond2);
11027 gcc_assert (cond1 == cond2);
11031 gcc_assert (cond1 == cond2);
11035 gcc_assert (cond1 == cond2);
11036 return CC_DLEUmode;
11039 gcc_assert (cond1 == cond2);
11040 return CC_DGEUmode;
11043 gcc_unreachable ();
11048 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11050 /* All floating point compares return CCFP if it is an equality
11051 comparison, and CCFPE otherwise. */
11052 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11072 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
11077 gcc_unreachable ();
11081 /* A compare with a shifted operand. Because of canonicalization, the
11082 comparison will have to be swapped when we emit the assembler. */
11083 if (GET_MODE (y) == SImode
11084 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11085 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11086 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11087 || GET_CODE (x) == ROTATERT))
11090 /* This operation is performed swapped, but since we only rely on the Z
11091 flag we don't need an additional mode. */
11092 if (GET_MODE (y) == SImode
11093 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11094 && GET_CODE (x) == NEG
11095 && (op == EQ || op == NE))
11098 /* This is a special case that is used by combine to allow a
11099 comparison of a shifted byte load to be split into a zero-extend
11100 followed by a comparison of the shifted integer (only valid for
11101 equalities and unsigned inequalities). */
11102 if (GET_MODE (x) == SImode
11103 && GET_CODE (x) == ASHIFT
11104 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
11105 && GET_CODE (XEXP (x, 0)) == SUBREG
11106 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
11107 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
11108 && (op == EQ || op == NE
11109 || op == GEU || op == GTU || op == LTU || op == LEU)
11110 && GET_CODE (y) == CONST_INT)
11113 /* A construct for a conditional compare, if the false arm contains
11114 0, then both conditions must be true, otherwise either condition
11115 must be true. Not all conditions are possible, so CCmode is
11116 returned if it can't be done. */
11117 if (GET_CODE (x) == IF_THEN_ELSE
11118 && (XEXP (x, 2) == const0_rtx
11119 || XEXP (x, 2) == const1_rtx)
11120 && COMPARISON_P (XEXP (x, 0))
11121 && COMPARISON_P (XEXP (x, 1)))
11122 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11123 INTVAL (XEXP (x, 2)));
11125 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11126 if (GET_CODE (x) == AND
11127 && (op == EQ || op == NE)
11128 && COMPARISON_P (XEXP (x, 0))
11129 && COMPARISON_P (XEXP (x, 1)))
11130 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11133 if (GET_CODE (x) == IOR
11134 && (op == EQ || op == NE)
11135 && COMPARISON_P (XEXP (x, 0))
11136 && COMPARISON_P (XEXP (x, 1)))
11137 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11140 /* An operation (on Thumb) where we want to test for a single bit.
11141 This is done by shifting that bit up into the top bit of a
11142 scratch register; we can then branch on the sign bit. */
11144 && GET_MODE (x) == SImode
11145 && (op == EQ || op == NE)
11146 && GET_CODE (x) == ZERO_EXTRACT
11147 && XEXP (x, 1) == const1_rtx)
11150 /* An operation that sets the condition codes as a side-effect, the
11151 V flag is not set correctly, so we can only use comparisons where
11152 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11154 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11155 if (GET_MODE (x) == SImode
11157 && (op == EQ || op == NE || op == LT || op == GE)
11158 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11159 || GET_CODE (x) == AND || GET_CODE (x) == IOR
11160 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11161 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11162 || GET_CODE (x) == LSHIFTRT
11163 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11164 || GET_CODE (x) == ROTATERT
11165 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11166 return CC_NOOVmode;
11168 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11171 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11172 && GET_CODE (x) == PLUS
11173 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11176 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11178 /* To keep things simple, always use the Cirrus cfcmp64 if it is
11180 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
11187 /* A DImode comparison against zero can be implemented by
11188 or'ing the two halves together. */
11189 if (y == const0_rtx)
11192 /* We can do an equality test in three Thumb instructions. */
11202 /* DImode unsigned comparisons can be implemented by cmp +
11203 cmpeq without a scratch register. Not worth doing in
11214 /* DImode signed and unsigned comparisons can be implemented
11215 by cmp + sbcs with a scratch register, but that does not
11216 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11217 gcc_assert (op != EQ && op != NE);
11221 gcc_unreachable ();
11228 /* X and Y are two things to compare using CODE. Emit the compare insn and
11229 return the rtx for register 0 in the proper mode. FP means this is a
11230 floating point compare: I don't think that it is needed on the arm. */
11232 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
11234 enum machine_mode mode;
11236 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11238 /* We might have X as a constant, Y as a register because of the predicates
11239 used for cmpdi. If so, force X to a register here. */
11240 if (dimode_comparison && !REG_P (x))
11241 x = force_reg (DImode, x);
11243 mode = SELECT_CC_MODE (code, x, y);
11244 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11246 if (dimode_comparison
11247 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
11248 && mode != CC_CZmode)
11252 /* To compare two non-zero values for equality, XOR them and
11253 then compare against zero. Not used for ARM mode; there
11254 CC_CZmode is cheaper. */
11255 if (mode == CC_Zmode && y != const0_rtx)
11257 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11260 /* A scratch register is required. */
11261 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
11262 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11263 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11266 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11271 /* Generate a sequence of insns that will generate the correct return
11272 address mask depending on the physical architecture that the program
11275 arm_gen_return_addr_mask (void)
11277 rtx reg = gen_reg_rtx (Pmode);
11279 emit_insn (gen_return_addr_mask (reg));
11284 arm_reload_in_hi (rtx *operands)
11286 rtx ref = operands[1];
11288 HOST_WIDE_INT offset = 0;
11290 if (GET_CODE (ref) == SUBREG)
11292 offset = SUBREG_BYTE (ref);
11293 ref = SUBREG_REG (ref);
11296 if (GET_CODE (ref) == REG)
11298 /* We have a pseudo which has been spilt onto the stack; there
11299 are two cases here: the first where there is a simple
11300 stack-slot replacement and a second where the stack-slot is
11301 out of range, or is used as a subreg. */
11302 if (reg_equiv_mem (REGNO (ref)))
11304 ref = reg_equiv_mem (REGNO (ref));
11305 base = find_replacement (&XEXP (ref, 0));
11308 /* The slot is out of range, or was dressed up in a SUBREG. */
11309 base = reg_equiv_address (REGNO (ref));
11312 base = find_replacement (&XEXP (ref, 0));
11314 /* Handle the case where the address is too complex to be offset by 1. */
11315 if (GET_CODE (base) == MINUS
11316 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11318 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11320 emit_set_insn (base_plus, base);
11323 else if (GET_CODE (base) == PLUS)
11325 /* The addend must be CONST_INT, or we would have dealt with it above. */
11326 HOST_WIDE_INT hi, lo;
11328 offset += INTVAL (XEXP (base, 1));
11329 base = XEXP (base, 0);
11331 /* Rework the address into a legal sequence of insns. */
11332 /* Valid range for lo is -4095 -> 4095 */
11335 : -((-offset) & 0xfff));
11337 /* Corner case, if lo is the max offset then we would be out of range
11338 once we have added the additional 1 below, so bump the msb into the
11339 pre-loading insn(s). */
11343 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11344 ^ (HOST_WIDE_INT) 0x80000000)
11345 - (HOST_WIDE_INT) 0x80000000);
11347 gcc_assert (hi + lo == offset);
11351 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11353 /* Get the base address; addsi3 knows how to handle constants
11354 that require more than one insn. */
11355 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11361 /* Operands[2] may overlap operands[0] (though it won't overlap
11362 operands[1]), that's why we asked for a DImode reg -- so we can
11363 use the bit that does not overlap. */
11364 if (REGNO (operands[2]) == REGNO (operands[0]))
11365 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11367 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11369 emit_insn (gen_zero_extendqisi2 (scratch,
11370 gen_rtx_MEM (QImode,
11371 plus_constant (base,
11373 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11374 gen_rtx_MEM (QImode,
11375 plus_constant (base,
11377 if (!BYTES_BIG_ENDIAN)
11378 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11379 gen_rtx_IOR (SImode,
11382 gen_rtx_SUBREG (SImode, operands[0], 0),
11386 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11387 gen_rtx_IOR (SImode,
11388 gen_rtx_ASHIFT (SImode, scratch,
11390 gen_rtx_SUBREG (SImode, operands[0], 0)));
11393 /* Handle storing a half-word to memory during reload by synthesizing as two
11394 byte stores. Take care not to clobber the input values until after we
11395 have moved them somewhere safe. This code assumes that if the DImode
11396 scratch in operands[2] overlaps either the input value or output address
11397 in some way, then that value must die in this insn (we absolutely need
11398 two scratch registers for some corner cases). */
11400 arm_reload_out_hi (rtx *operands)
11402 rtx ref = operands[0];
11403 rtx outval = operands[1];
11405 HOST_WIDE_INT offset = 0;
11407 if (GET_CODE (ref) == SUBREG)
11409 offset = SUBREG_BYTE (ref);
11410 ref = SUBREG_REG (ref);
11413 if (GET_CODE (ref) == REG)
11415 /* We have a pseudo which has been spilt onto the stack; there
11416 are two cases here: the first where there is a simple
11417 stack-slot replacement and a second where the stack-slot is
11418 out of range, or is used as a subreg. */
11419 if (reg_equiv_mem (REGNO (ref)))
11421 ref = reg_equiv_mem (REGNO (ref));
11422 base = find_replacement (&XEXP (ref, 0));
11425 /* The slot is out of range, or was dressed up in a SUBREG. */
11426 base = reg_equiv_address (REGNO (ref));
11429 base = find_replacement (&XEXP (ref, 0));
11431 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11433 /* Handle the case where the address is too complex to be offset by 1. */
11434 if (GET_CODE (base) == MINUS
11435 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11437 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11439 /* Be careful not to destroy OUTVAL. */
11440 if (reg_overlap_mentioned_p (base_plus, outval))
11442 /* Updating base_plus might destroy outval, see if we can
11443 swap the scratch and base_plus. */
11444 if (!reg_overlap_mentioned_p (scratch, outval))
11447 scratch = base_plus;
11452 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11454 /* Be conservative and copy OUTVAL into the scratch now,
11455 this should only be necessary if outval is a subreg
11456 of something larger than a word. */
11457 /* XXX Might this clobber base? I can't see how it can,
11458 since scratch is known to overlap with OUTVAL, and
11459 must be wider than a word. */
11460 emit_insn (gen_movhi (scratch_hi, outval));
11461 outval = scratch_hi;
11465 emit_set_insn (base_plus, base);
11468 else if (GET_CODE (base) == PLUS)
11470 /* The addend must be CONST_INT, or we would have dealt with it above. */
11471 HOST_WIDE_INT hi, lo;
11473 offset += INTVAL (XEXP (base, 1));
11474 base = XEXP (base, 0);
11476 /* Rework the address into a legal sequence of insns. */
11477 /* Valid range for lo is -4095 -> 4095 */
11480 : -((-offset) & 0xfff));
11482 /* Corner case, if lo is the max offset then we would be out of range
11483 once we have added the additional 1 below, so bump the msb into the
11484 pre-loading insn(s). */
11488 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11489 ^ (HOST_WIDE_INT) 0x80000000)
11490 - (HOST_WIDE_INT) 0x80000000);
11492 gcc_assert (hi + lo == offset);
11496 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11498 /* Be careful not to destroy OUTVAL. */
11499 if (reg_overlap_mentioned_p (base_plus, outval))
11501 /* Updating base_plus might destroy outval, see if we
11502 can swap the scratch and base_plus. */
11503 if (!reg_overlap_mentioned_p (scratch, outval))
11506 scratch = base_plus;
11511 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11513 /* Be conservative and copy outval into scratch now,
11514 this should only be necessary if outval is a
11515 subreg of something larger than a word. */
11516 /* XXX Might this clobber base? I can't see how it
11517 can, since scratch is known to overlap with
11519 emit_insn (gen_movhi (scratch_hi, outval));
11520 outval = scratch_hi;
11524 /* Get the base address; addsi3 knows how to handle constants
11525 that require more than one insn. */
11526 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11532 if (BYTES_BIG_ENDIAN)
11534 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11535 plus_constant (base, offset + 1)),
11536 gen_lowpart (QImode, outval)));
11537 emit_insn (gen_lshrsi3 (scratch,
11538 gen_rtx_SUBREG (SImode, outval, 0),
11540 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11541 gen_lowpart (QImode, scratch)));
11545 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
11546 gen_lowpart (QImode, outval)));
11547 emit_insn (gen_lshrsi3 (scratch,
11548 gen_rtx_SUBREG (SImode, outval, 0),
11550 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
11551 plus_constant (base, offset + 1)),
11552 gen_lowpart (QImode, scratch)));
11556 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11557 (padded to the size of a word) should be passed in a register. */
11560 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
11562 if (TARGET_AAPCS_BASED)
11563 return must_pass_in_stack_var_size (mode, type);
11565 return must_pass_in_stack_var_size_or_pad (mode, type);
11569 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11570 Return true if an argument passed on the stack should be padded upwards,
11571 i.e. if the least-significant byte has useful data.
11572 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11573 aggregate types are placed in the lowest memory address. */
11576 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
11578 if (!TARGET_AAPCS_BASED)
11579 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11581 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11588 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11589 Return !BYTES_BIG_ENDIAN if the least significant byte of the
11590 register has useful data, and return the opposite if the most
11591 significant byte does. */
11594 arm_pad_reg_upward (enum machine_mode mode,
11595 tree type, int first ATTRIBUTE_UNUSED)
11597 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
11599 /* For AAPCS, small aggregates, small fixed-point types,
11600 and small complex types are always padded upwards. */
11603 if ((AGGREGATE_TYPE_P (type)
11604 || TREE_CODE (type) == COMPLEX_TYPE
11605 || FIXED_POINT_TYPE_P (type))
11606 && int_size_in_bytes (type) <= 4)
11611 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
11612 && GET_MODE_SIZE (mode) <= 4)
11617 /* Otherwise, use default padding. */
11618 return !BYTES_BIG_ENDIAN;
11622 /* Print a symbolic form of X to the debug file, F. */
11624 arm_print_value (FILE *f, rtx x)
11626 switch (GET_CODE (x))
11629 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11633 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11641 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11643 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11644 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11652 fprintf (f, "\"%s\"", XSTR (x, 0));
11656 fprintf (f, "`%s'", XSTR (x, 0));
11660 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11664 arm_print_value (f, XEXP (x, 0));
11668 arm_print_value (f, XEXP (x, 0));
11670 arm_print_value (f, XEXP (x, 1));
11678 fprintf (f, "????");
11683 /* Routines for manipulation of the constant pool. */
11685 /* Arm instructions cannot load a large constant directly into a
11686 register; they have to come from a pc relative load. The constant
11687 must therefore be placed in the addressable range of the pc
11688 relative load. Depending on the precise pc relative load
11689 instruction the range is somewhere between 256 bytes and 4k. This
11690 means that we often have to dump a constant inside a function, and
11691 generate code to branch around it.
11693 It is important to minimize this, since the branches will slow
11694 things down and make the code larger.
11696 Normally we can hide the table after an existing unconditional
11697 branch so that there is no interruption of the flow, but in the
11698 worst case the code looks like this:
11716 We fix this by performing a scan after scheduling, which notices
11717 which instructions need to have their operands fetched from the
11718 constant table and builds the table.
11720 The algorithm starts by building a table of all the constants that
11721 need fixing up and all the natural barriers in the function (places
11722 where a constant table can be dropped without breaking the flow).
11723 For each fixup we note how far the pc-relative replacement will be
11724 able to reach and the offset of the instruction into the function.
11726 Having built the table we then group the fixes together to form
11727 tables that are as large as possible (subject to addressing
11728 constraints) and emit each table of constants after the last
11729 barrier that is within range of all the instructions in the group.
11730 If a group does not contain a barrier, then we forcibly create one
11731 by inserting a jump instruction into the flow. Once the table has
11732 been inserted, the insns are then modified to reference the
11733 relevant entry in the pool.
11735 Possible enhancements to the algorithm (not implemented) are:
11737 1) For some processors and object formats, there may be benefit in
11738 aligning the pools to the start of cache lines; this alignment
11739 would need to be taken into account when calculating addressability
11742 /* These typedefs are located at the start of this file, so that
11743 they can be used in the prototypes there. This comment is to
11744 remind readers of that fact so that the following structures
11745 can be understood more easily.
11747 typedef struct minipool_node Mnode;
11748 typedef struct minipool_fixup Mfix; */
11750 struct minipool_node
11752 /* Doubly linked chain of entries. */
11755 /* The maximum offset into the code that this entry can be placed. While
11756 pushing fixes for forward references, all entries are sorted in order
11757 of increasing max_address. */
11758 HOST_WIDE_INT max_address;
11759 /* Similarly for an entry inserted for a backwards ref. */
11760 HOST_WIDE_INT min_address;
11761 /* The number of fixes referencing this entry. This can become zero
11762 if we "unpush" an entry. In this case we ignore the entry when we
11763 come to emit the code. */
11765 /* The offset from the start of the minipool. */
11766 HOST_WIDE_INT offset;
11767 /* The value in table. */
11769 /* The mode of value. */
11770 enum machine_mode mode;
11771 /* The size of the value. With iWMMXt enabled
11772 sizes > 4 also imply an alignment of 8-bytes. */
11776 struct minipool_fixup
11780 HOST_WIDE_INT address;
11782 enum machine_mode mode;
11786 HOST_WIDE_INT forwards;
11787 HOST_WIDE_INT backwards;
11790 /* Fixes less than a word need padding out to a word boundary. */
11791 #define MINIPOOL_FIX_SIZE(mode) \
11792 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11794 static Mnode * minipool_vector_head;
11795 static Mnode * minipool_vector_tail;
11796 static rtx minipool_vector_label;
11797 static int minipool_pad;
11799 /* The linked list of all minipool fixes required for this function. */
11800 Mfix * minipool_fix_head;
11801 Mfix * minipool_fix_tail;
11802 /* The fix entry for the current minipool, once it has been placed. */
11803 Mfix * minipool_barrier;
11805 /* Determines if INSN is the start of a jump table. Returns the end
11806 of the TABLE or NULL_RTX. */
11808 is_jump_table (rtx insn)
11812 if (jump_to_label_p (insn)
11813 && ((table = next_real_insn (JUMP_LABEL (insn)))
11814 == next_real_insn (insn))
11816 && GET_CODE (table) == JUMP_INSN
11817 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11818 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11824 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11825 #define JUMP_TABLES_IN_TEXT_SECTION 0
11828 static HOST_WIDE_INT
11829 get_jump_table_size (rtx insn)
11831 /* ADDR_VECs only take room if read-only data does into the text
11833 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11835 rtx body = PATTERN (insn);
11836 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11837 HOST_WIDE_INT size;
11838 HOST_WIDE_INT modesize;
11840 modesize = GET_MODE_SIZE (GET_MODE (body));
11841 size = modesize * XVECLEN (body, elt);
11845 /* Round up size of TBB table to a halfword boundary. */
11846 size = (size + 1) & ~(HOST_WIDE_INT)1;
11849 /* No padding necessary for TBH. */
11852 /* Add two bytes for alignment on Thumb. */
11857 gcc_unreachable ();
11865 /* Return the maximum amount of padding that will be inserted before
11868 static HOST_WIDE_INT
11869 get_label_padding (rtx label)
11871 HOST_WIDE_INT align, min_insn_size;
11873 align = 1 << label_to_alignment (label);
11874 min_insn_size = TARGET_THUMB ? 2 : 4;
11875 return align > min_insn_size ? align - min_insn_size : 0;
11878 /* Move a minipool fix MP from its current location to before MAX_MP.
11879 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11880 constraints may need updating. */
11882 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11883 HOST_WIDE_INT max_address)
11885 /* The code below assumes these are different. */
11886 gcc_assert (mp != max_mp);
11888 if (max_mp == NULL)
11890 if (max_address < mp->max_address)
11891 mp->max_address = max_address;
11895 if (max_address > max_mp->max_address - mp->fix_size)
11896 mp->max_address = max_mp->max_address - mp->fix_size;
11898 mp->max_address = max_address;
11900 /* Unlink MP from its current position. Since max_mp is non-null,
11901 mp->prev must be non-null. */
11902 mp->prev->next = mp->next;
11903 if (mp->next != NULL)
11904 mp->next->prev = mp->prev;
11906 minipool_vector_tail = mp->prev;
11908 /* Re-insert it before MAX_MP. */
11910 mp->prev = max_mp->prev;
11913 if (mp->prev != NULL)
11914 mp->prev->next = mp;
11916 minipool_vector_head = mp;
11919 /* Save the new entry. */
11922 /* Scan over the preceding entries and adjust their addresses as
11924 while (mp->prev != NULL
11925 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11927 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11934 /* Add a constant to the minipool for a forward reference. Returns the
11935 node added or NULL if the constant will not fit in this pool. */
11937 add_minipool_forward_ref (Mfix *fix)
11939 /* If set, max_mp is the first pool_entry that has a lower
11940 constraint than the one we are trying to add. */
11941 Mnode * max_mp = NULL;
11942 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11945 /* If the minipool starts before the end of FIX->INSN then this FIX
11946 can not be placed into the current pool. Furthermore, adding the
11947 new constant pool entry may cause the pool to start FIX_SIZE bytes
11949 if (minipool_vector_head &&
11950 (fix->address + get_attr_length (fix->insn)
11951 >= minipool_vector_head->max_address - fix->fix_size))
11954 /* Scan the pool to see if a constant with the same value has
11955 already been added. While we are doing this, also note the
11956 location where we must insert the constant if it doesn't already
11958 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11960 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11961 && fix->mode == mp->mode
11962 && (GET_CODE (fix->value) != CODE_LABEL
11963 || (CODE_LABEL_NUMBER (fix->value)
11964 == CODE_LABEL_NUMBER (mp->value)))
11965 && rtx_equal_p (fix->value, mp->value))
11967 /* More than one fix references this entry. */
11969 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11972 /* Note the insertion point if necessary. */
11974 && mp->max_address > max_address)
11977 /* If we are inserting an 8-bytes aligned quantity and
11978 we have not already found an insertion point, then
11979 make sure that all such 8-byte aligned quantities are
11980 placed at the start of the pool. */
11981 if (ARM_DOUBLEWORD_ALIGN
11983 && fix->fix_size >= 8
11984 && mp->fix_size < 8)
11987 max_address = mp->max_address;
11991 /* The value is not currently in the minipool, so we need to create
11992 a new entry for it. If MAX_MP is NULL, the entry will be put on
11993 the end of the list since the placement is less constrained than
11994 any existing entry. Otherwise, we insert the new fix before
11995 MAX_MP and, if necessary, adjust the constraints on the other
11998 mp->fix_size = fix->fix_size;
11999 mp->mode = fix->mode;
12000 mp->value = fix->value;
12002 /* Not yet required for a backwards ref. */
12003 mp->min_address = -65536;
12005 if (max_mp == NULL)
12007 mp->max_address = max_address;
12009 mp->prev = minipool_vector_tail;
12011 if (mp->prev == NULL)
12013 minipool_vector_head = mp;
12014 minipool_vector_label = gen_label_rtx ();
12017 mp->prev->next = mp;
12019 minipool_vector_tail = mp;
12023 if (max_address > max_mp->max_address - mp->fix_size)
12024 mp->max_address = max_mp->max_address - mp->fix_size;
12026 mp->max_address = max_address;
12029 mp->prev = max_mp->prev;
12031 if (mp->prev != NULL)
12032 mp->prev->next = mp;
12034 minipool_vector_head = mp;
12037 /* Save the new entry. */
12040 /* Scan over the preceding entries and adjust their addresses as
12042 while (mp->prev != NULL
12043 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12045 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12053 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
12054 HOST_WIDE_INT min_address)
12056 HOST_WIDE_INT offset;
12058 /* The code below assumes these are different. */
12059 gcc_assert (mp != min_mp);
12061 if (min_mp == NULL)
12063 if (min_address > mp->min_address)
12064 mp->min_address = min_address;
12068 /* We will adjust this below if it is too loose. */
12069 mp->min_address = min_address;
12071 /* Unlink MP from its current position. Since min_mp is non-null,
12072 mp->next must be non-null. */
12073 mp->next->prev = mp->prev;
12074 if (mp->prev != NULL)
12075 mp->prev->next = mp->next;
12077 minipool_vector_head = mp->next;
12079 /* Reinsert it after MIN_MP. */
12081 mp->next = min_mp->next;
12083 if (mp->next != NULL)
12084 mp->next->prev = mp;
12086 minipool_vector_tail = mp;
12092 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12094 mp->offset = offset;
12095 if (mp->refcount > 0)
12096 offset += mp->fix_size;
12098 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
12099 mp->next->min_address = mp->min_address + mp->fix_size;
12105 /* Add a constant to the minipool for a backward reference. Returns the
12106 node added or NULL if the constant will not fit in this pool.
12108 Note that the code for insertion for a backwards reference can be
12109 somewhat confusing because the calculated offsets for each fix do
12110 not take into account the size of the pool (which is still under
12113 add_minipool_backward_ref (Mfix *fix)
12115 /* If set, min_mp is the last pool_entry that has a lower constraint
12116 than the one we are trying to add. */
12117 Mnode *min_mp = NULL;
12118 /* This can be negative, since it is only a constraint. */
12119 HOST_WIDE_INT min_address = fix->address - fix->backwards;
12122 /* If we can't reach the current pool from this insn, or if we can't
12123 insert this entry at the end of the pool without pushing other
12124 fixes out of range, then we don't try. This ensures that we
12125 can't fail later on. */
12126 if (min_address >= minipool_barrier->address
12127 || (minipool_vector_tail->min_address + fix->fix_size
12128 >= minipool_barrier->address))
12131 /* Scan the pool to see if a constant with the same value has
12132 already been added. While we are doing this, also note the
12133 location where we must insert the constant if it doesn't already
12135 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
12137 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12138 && fix->mode == mp->mode
12139 && (GET_CODE (fix->value) != CODE_LABEL
12140 || (CODE_LABEL_NUMBER (fix->value)
12141 == CODE_LABEL_NUMBER (mp->value)))
12142 && rtx_equal_p (fix->value, mp->value)
12143 /* Check that there is enough slack to move this entry to the
12144 end of the table (this is conservative). */
12145 && (mp->max_address
12146 > (minipool_barrier->address
12147 + minipool_vector_tail->offset
12148 + minipool_vector_tail->fix_size)))
12151 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
12154 if (min_mp != NULL)
12155 mp->min_address += fix->fix_size;
12158 /* Note the insertion point if necessary. */
12159 if (mp->min_address < min_address)
12161 /* For now, we do not allow the insertion of 8-byte alignment
12162 requiring nodes anywhere but at the start of the pool. */
12163 if (ARM_DOUBLEWORD_ALIGN
12164 && fix->fix_size >= 8 && mp->fix_size < 8)
12169 else if (mp->max_address
12170 < minipool_barrier->address + mp->offset + fix->fix_size)
12172 /* Inserting before this entry would push the fix beyond
12173 its maximum address (which can happen if we have
12174 re-located a forwards fix); force the new fix to come
12176 if (ARM_DOUBLEWORD_ALIGN
12177 && fix->fix_size >= 8 && mp->fix_size < 8)
12182 min_address = mp->min_address + fix->fix_size;
12185 /* Do not insert a non-8-byte aligned quantity before 8-byte
12186 aligned quantities. */
12187 else if (ARM_DOUBLEWORD_ALIGN
12188 && fix->fix_size < 8
12189 && mp->fix_size >= 8)
12192 min_address = mp->min_address + fix->fix_size;
12197 /* We need to create a new entry. */
12199 mp->fix_size = fix->fix_size;
12200 mp->mode = fix->mode;
12201 mp->value = fix->value;
12203 mp->max_address = minipool_barrier->address + 65536;
12205 mp->min_address = min_address;
12207 if (min_mp == NULL)
12210 mp->next = minipool_vector_head;
12212 if (mp->next == NULL)
12214 minipool_vector_tail = mp;
12215 minipool_vector_label = gen_label_rtx ();
12218 mp->next->prev = mp;
12220 minipool_vector_head = mp;
12224 mp->next = min_mp->next;
12228 if (mp->next != NULL)
12229 mp->next->prev = mp;
12231 minipool_vector_tail = mp;
12234 /* Save the new entry. */
12242 /* Scan over the following entries and adjust their offsets. */
12243 while (mp->next != NULL)
12245 if (mp->next->min_address < mp->min_address + mp->fix_size)
12246 mp->next->min_address = mp->min_address + mp->fix_size;
12249 mp->next->offset = mp->offset + mp->fix_size;
12251 mp->next->offset = mp->offset;
12260 assign_minipool_offsets (Mfix *barrier)
12262 HOST_WIDE_INT offset = 0;
12265 minipool_barrier = barrier;
12267 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12269 mp->offset = offset;
12271 if (mp->refcount > 0)
12272 offset += mp->fix_size;
12276 /* Output the literal table */
12278 dump_minipool (rtx scan)
12284 if (ARM_DOUBLEWORD_ALIGN)
12285 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12286 if (mp->refcount > 0 && mp->fix_size >= 8)
12293 fprintf (dump_file,
12294 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12295 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12297 scan = emit_label_after (gen_label_rtx (), scan);
12298 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12299 scan = emit_label_after (minipool_vector_label, scan);
12301 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12303 if (mp->refcount > 0)
12307 fprintf (dump_file,
12308 ";; Offset %u, min %ld, max %ld ",
12309 (unsigned) mp->offset, (unsigned long) mp->min_address,
12310 (unsigned long) mp->max_address);
12311 arm_print_value (dump_file, mp->value);
12312 fputc ('\n', dump_file);
12315 switch (mp->fix_size)
12317 #ifdef HAVE_consttable_1
12319 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12323 #ifdef HAVE_consttable_2
12325 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12329 #ifdef HAVE_consttable_4
12331 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12335 #ifdef HAVE_consttable_8
12337 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12341 #ifdef HAVE_consttable_16
12343 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12348 gcc_unreachable ();
12356 minipool_vector_head = minipool_vector_tail = NULL;
12357 scan = emit_insn_after (gen_consttable_end (), scan);
12358 scan = emit_barrier_after (scan);
12361 /* Return the cost of forcibly inserting a barrier after INSN. */
12363 arm_barrier_cost (rtx insn)
12365 /* Basing the location of the pool on the loop depth is preferable,
12366 but at the moment, the basic block information seems to be
12367 corrupt by this stage of the compilation. */
12368 int base_cost = 50;
12369 rtx next = next_nonnote_insn (insn);
12371 if (next != NULL && GET_CODE (next) == CODE_LABEL)
12374 switch (GET_CODE (insn))
12377 /* It will always be better to place the table before the label, rather
12386 return base_cost - 10;
12389 return base_cost + 10;
12393 /* Find the best place in the insn stream in the range
12394 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12395 Create the barrier by inserting a jump and add a new fix entry for
12398 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
12400 HOST_WIDE_INT count = 0;
12402 rtx from = fix->insn;
12403 /* The instruction after which we will insert the jump. */
12404 rtx selected = NULL;
12406 /* The address at which the jump instruction will be placed. */
12407 HOST_WIDE_INT selected_address;
12409 HOST_WIDE_INT max_count = max_address - fix->address;
12410 rtx label = gen_label_rtx ();
12412 selected_cost = arm_barrier_cost (from);
12413 selected_address = fix->address;
12415 while (from && count < max_count)
12420 /* This code shouldn't have been called if there was a natural barrier
12422 gcc_assert (GET_CODE (from) != BARRIER);
12424 /* Count the length of this insn. This must stay in sync with the
12425 code that pushes minipool fixes. */
12426 if (LABEL_P (from))
12427 count += get_label_padding (from);
12429 count += get_attr_length (from);
12431 /* If there is a jump table, add its length. */
12432 tmp = is_jump_table (from);
12435 count += get_jump_table_size (tmp);
12437 /* Jump tables aren't in a basic block, so base the cost on
12438 the dispatch insn. If we select this location, we will
12439 still put the pool after the table. */
12440 new_cost = arm_barrier_cost (from);
12442 if (count < max_count
12443 && (!selected || new_cost <= selected_cost))
12446 selected_cost = new_cost;
12447 selected_address = fix->address + count;
12450 /* Continue after the dispatch table. */
12451 from = NEXT_INSN (tmp);
12455 new_cost = arm_barrier_cost (from);
12457 if (count < max_count
12458 && (!selected || new_cost <= selected_cost))
12461 selected_cost = new_cost;
12462 selected_address = fix->address + count;
12465 from = NEXT_INSN (from);
12468 /* Make sure that we found a place to insert the jump. */
12469 gcc_assert (selected);
12471 /* Make sure we do not split a call and its corresponding
12472 CALL_ARG_LOCATION note. */
12473 if (CALL_P (selected))
12475 rtx next = NEXT_INSN (selected);
12476 if (next && NOTE_P (next)
12477 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
12481 /* Create a new JUMP_INSN that branches around a barrier. */
12482 from = emit_jump_insn_after (gen_jump (label), selected);
12483 JUMP_LABEL (from) = label;
12484 barrier = emit_barrier_after (from);
12485 emit_label_after (label, barrier);
12487 /* Create a minipool barrier entry for the new barrier. */
12488 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
12489 new_fix->insn = barrier;
12490 new_fix->address = selected_address;
12491 new_fix->next = fix->next;
12492 fix->next = new_fix;
12497 /* Record that there is a natural barrier in the insn stream at
12500 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
12502 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12505 fix->address = address;
12508 if (minipool_fix_head != NULL)
12509 minipool_fix_tail->next = fix;
12511 minipool_fix_head = fix;
12513 minipool_fix_tail = fix;
12516 /* Record INSN, which will need fixing up to load a value from the
12517 minipool. ADDRESS is the offset of the insn since the start of the
12518 function; LOC is a pointer to the part of the insn which requires
12519 fixing; VALUE is the constant that must be loaded, which is of type
12522 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
12523 enum machine_mode mode, rtx value)
12525 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
12528 fix->address = address;
12531 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
12532 fix->value = value;
12533 fix->forwards = get_attr_pool_range (insn);
12534 fix->backwards = get_attr_neg_pool_range (insn);
12535 fix->minipool = NULL;
12537 /* If an insn doesn't have a range defined for it, then it isn't
12538 expecting to be reworked by this code. Better to stop now than
12539 to generate duff assembly code. */
12540 gcc_assert (fix->forwards || fix->backwards);
12542 /* If an entry requires 8-byte alignment then assume all constant pools
12543 require 4 bytes of padding. Trying to do this later on a per-pool
12544 basis is awkward because existing pool entries have to be modified. */
12545 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
12550 fprintf (dump_file,
12551 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12552 GET_MODE_NAME (mode),
12553 INSN_UID (insn), (unsigned long) address,
12554 -1 * (long)fix->backwards, (long)fix->forwards);
12555 arm_print_value (dump_file, fix->value);
12556 fprintf (dump_file, "\n");
12559 /* Add it to the chain of fixes. */
12562 if (minipool_fix_head != NULL)
12563 minipool_fix_tail->next = fix;
12565 minipool_fix_head = fix;
12567 minipool_fix_tail = fix;
12570 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12571 Returns the number of insns needed, or 99 if we don't know how to
12574 arm_const_double_inline_cost (rtx val)
12576 rtx lowpart, highpart;
12577 enum machine_mode mode;
12579 mode = GET_MODE (val);
12581 if (mode == VOIDmode)
12584 gcc_assert (GET_MODE_SIZE (mode) == 8);
12586 lowpart = gen_lowpart (SImode, val);
12587 highpart = gen_highpart_mode (SImode, mode, val);
12589 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12590 gcc_assert (GET_CODE (highpart) == CONST_INT);
12592 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12593 NULL_RTX, NULL_RTX, 0, 0)
12594 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12595 NULL_RTX, NULL_RTX, 0, 0));
12598 /* Return true if it is worthwhile to split a 64-bit constant into two
12599 32-bit operations. This is the case if optimizing for size, or
12600 if we have load delay slots, or if one 32-bit part can be done with
12601 a single data operation. */
12603 arm_const_double_by_parts (rtx val)
12605 enum machine_mode mode = GET_MODE (val);
12608 if (optimize_size || arm_ld_sched)
12611 if (mode == VOIDmode)
12614 part = gen_highpart_mode (SImode, mode, val);
12616 gcc_assert (GET_CODE (part) == CONST_INT);
12618 if (const_ok_for_arm (INTVAL (part))
12619 || const_ok_for_arm (~INTVAL (part)))
12622 part = gen_lowpart (SImode, val);
12624 gcc_assert (GET_CODE (part) == CONST_INT);
12626 if (const_ok_for_arm (INTVAL (part))
12627 || const_ok_for_arm (~INTVAL (part)))
12633 /* Return true if it is possible to inline both the high and low parts
12634 of a 64-bit constant into 32-bit data processing instructions. */
12636 arm_const_double_by_immediates (rtx val)
12638 enum machine_mode mode = GET_MODE (val);
12641 if (mode == VOIDmode)
12644 part = gen_highpart_mode (SImode, mode, val);
12646 gcc_assert (GET_CODE (part) == CONST_INT);
12648 if (!const_ok_for_arm (INTVAL (part)))
12651 part = gen_lowpart (SImode, val);
12653 gcc_assert (GET_CODE (part) == CONST_INT);
12655 if (!const_ok_for_arm (INTVAL (part)))
12661 /* Scan INSN and note any of its operands that need fixing.
12662 If DO_PUSHES is false we do not actually push any of the fixups
12663 needed. The function returns TRUE if any fixups were needed/pushed.
12664 This is used by arm_memory_load_p() which needs to know about loads
12665 of constants that will be converted into minipool loads. */
12667 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12669 bool result = false;
12672 extract_insn (insn);
12674 if (!constrain_operands (1))
12675 fatal_insn_not_found (insn);
12677 if (recog_data.n_alternatives == 0)
12680 /* Fill in recog_op_alt with information about the constraints of
12682 preprocess_constraints ();
12684 for (opno = 0; opno < recog_data.n_operands; opno++)
12686 /* Things we need to fix can only occur in inputs. */
12687 if (recog_data.operand_type[opno] != OP_IN)
12690 /* If this alternative is a memory reference, then any mention
12691 of constants in this alternative is really to fool reload
12692 into allowing us to accept one there. We need to fix them up
12693 now so that we output the right code. */
12694 if (recog_op_alt[opno][which_alternative].memory_ok)
12696 rtx op = recog_data.operand[opno];
12698 if (CONSTANT_P (op))
12701 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12702 recog_data.operand_mode[opno], op);
12705 else if (GET_CODE (op) == MEM
12706 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12707 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12711 rtx cop = avoid_constant_pool_reference (op);
12713 /* Casting the address of something to a mode narrower
12714 than a word can cause avoid_constant_pool_reference()
12715 to return the pool reference itself. That's no good to
12716 us here. Lets just hope that we can use the
12717 constant pool value directly. */
12719 cop = get_pool_constant (XEXP (op, 0));
12721 push_minipool_fix (insn, address,
12722 recog_data.operand_loc[opno],
12723 recog_data.operand_mode[opno], cop);
12734 /* Convert instructions to their cc-clobbering variant if possible, since
12735 that allows us to use smaller encodings. */
12738 thumb2_reorg (void)
12743 INIT_REG_SET (&live);
12745 /* We are freeing block_for_insn in the toplev to keep compatibility
12746 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12747 compute_bb_for_insn ();
12754 COPY_REG_SET (&live, DF_LR_OUT (bb));
12755 df_simulate_initialize_backwards (bb, &live);
12756 FOR_BB_INSNS_REVERSE (bb, insn)
12758 if (NONJUMP_INSN_P (insn)
12759 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12761 rtx pat = PATTERN (insn);
12762 if (GET_CODE (pat) == SET
12763 && low_register_operand (XEXP (pat, 0), SImode)
12764 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12765 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12766 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12768 rtx dst = XEXP (pat, 0);
12769 rtx src = XEXP (pat, 1);
12770 rtx op0 = XEXP (src, 0);
12771 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
12772 ? XEXP (src, 1) : NULL);
12774 if (rtx_equal_p (dst, op0)
12775 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12777 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12778 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12779 rtvec vec = gen_rtvec (2, pat, clobber);
12781 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12782 INSN_CODE (insn) = -1;
12784 /* We can also handle a commutative operation where the
12785 second operand matches the destination. */
12786 else if (op1 && rtx_equal_p (dst, op1))
12788 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12789 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12792 src = copy_rtx (src);
12793 XEXP (src, 0) = op1;
12794 XEXP (src, 1) = op0;
12795 pat = gen_rtx_SET (VOIDmode, dst, src);
12796 vec = gen_rtvec (2, pat, clobber);
12797 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12798 INSN_CODE (insn) = -1;
12803 if (NONDEBUG_INSN_P (insn))
12804 df_simulate_one_insn_backwards (bb, insn, &live);
12808 CLEAR_REG_SET (&live);
12811 /* Gcc puts the pool in the wrong place for ARM, since we can only
12812 load addresses a limited distance around the pc. We do some
12813 special munging to move the constant pool values to the correct
12814 point in the code. */
12819 HOST_WIDE_INT address = 0;
12825 minipool_fix_head = minipool_fix_tail = NULL;
12827 /* The first insn must always be a note, or the code below won't
12828 scan it properly. */
12829 insn = get_insns ();
12830 gcc_assert (GET_CODE (insn) == NOTE);
12833 /* Scan all the insns and record the operands that will need fixing. */
12834 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12836 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12837 && (arm_cirrus_insn_p (insn)
12838 || GET_CODE (insn) == JUMP_INSN
12839 || arm_memory_load_p (insn)))
12840 cirrus_reorg (insn);
12842 if (GET_CODE (insn) == BARRIER)
12843 push_minipool_barrier (insn, address);
12844 else if (INSN_P (insn))
12848 note_invalid_constants (insn, address, true);
12849 address += get_attr_length (insn);
12851 /* If the insn is a vector jump, add the size of the table
12852 and skip the table. */
12853 if ((table = is_jump_table (insn)) != NULL)
12855 address += get_jump_table_size (table);
12859 else if (LABEL_P (insn))
12860 /* Add the worst-case padding due to alignment. We don't add
12861 the _current_ padding because the minipool insertions
12862 themselves might change it. */
12863 address += get_label_padding (insn);
12866 fix = minipool_fix_head;
12868 /* Now scan the fixups and perform the required changes. */
12873 Mfix * last_added_fix;
12874 Mfix * last_barrier = NULL;
12877 /* Skip any further barriers before the next fix. */
12878 while (fix && GET_CODE (fix->insn) == BARRIER)
12881 /* No more fixes. */
12885 last_added_fix = NULL;
12887 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12889 if (GET_CODE (ftmp->insn) == BARRIER)
12891 if (ftmp->address >= minipool_vector_head->max_address)
12894 last_barrier = ftmp;
12896 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12899 last_added_fix = ftmp; /* Keep track of the last fix added. */
12902 /* If we found a barrier, drop back to that; any fixes that we
12903 could have reached but come after the barrier will now go in
12904 the next mini-pool. */
12905 if (last_barrier != NULL)
12907 /* Reduce the refcount for those fixes that won't go into this
12909 for (fdel = last_barrier->next;
12910 fdel && fdel != ftmp;
12913 fdel->minipool->refcount--;
12914 fdel->minipool = NULL;
12917 ftmp = last_barrier;
12921 /* ftmp is first fix that we can't fit into this pool and
12922 there no natural barriers that we could use. Insert a
12923 new barrier in the code somewhere between the previous
12924 fix and this one, and arrange to jump around it. */
12925 HOST_WIDE_INT max_address;
12927 /* The last item on the list of fixes must be a barrier, so
12928 we can never run off the end of the list of fixes without
12929 last_barrier being set. */
12932 max_address = minipool_vector_head->max_address;
12933 /* Check that there isn't another fix that is in range that
12934 we couldn't fit into this pool because the pool was
12935 already too large: we need to put the pool before such an
12936 instruction. The pool itself may come just after the
12937 fix because create_fix_barrier also allows space for a
12938 jump instruction. */
12939 if (ftmp->address < max_address)
12940 max_address = ftmp->address + 1;
12942 last_barrier = create_fix_barrier (last_added_fix, max_address);
12945 assign_minipool_offsets (last_barrier);
12949 if (GET_CODE (ftmp->insn) != BARRIER
12950 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12957 /* Scan over the fixes we have identified for this pool, fixing them
12958 up and adding the constants to the pool itself. */
12959 for (this_fix = fix; this_fix && ftmp != this_fix;
12960 this_fix = this_fix->next)
12961 if (GET_CODE (this_fix->insn) != BARRIER)
12964 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12965 minipool_vector_label),
12966 this_fix->minipool->offset);
12967 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12970 dump_minipool (last_barrier->insn);
12974 /* From now on we must synthesize any constants that we can't handle
12975 directly. This can happen if the RTL gets split during final
12976 instruction generation. */
12977 after_arm_reorg = 1;
12979 /* Free the minipool memory. */
12980 obstack_free (&minipool_obstack, minipool_startobj);
12983 /* Routines to output assembly language. */
12985 /* If the rtx is the correct value then return the string of the number.
12986 In this way we can ensure that valid double constants are generated even
12987 when cross compiling. */
12989 fp_immediate_constant (rtx x)
12994 if (!fp_consts_inited)
12997 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12998 for (i = 0; i < 8; i++)
12999 if (REAL_VALUES_EQUAL (r, values_fp[i]))
13000 return strings_fp[i];
13002 gcc_unreachable ();
13005 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13006 static const char *
13007 fp_const_from_val (REAL_VALUE_TYPE *r)
13011 if (!fp_consts_inited)
13014 for (i = 0; i < 8; i++)
13015 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
13016 return strings_fp[i];
13018 gcc_unreachable ();
13021 /* Output the operands of a LDM/STM instruction to STREAM.
13022 MASK is the ARM register set mask of which only bits 0-15 are important.
13023 REG is the base register, either the frame pointer or the stack pointer,
13024 INSTR is the possibly suffixed load or store instruction.
13025 RFE is nonzero if the instruction should also copy spsr to cpsr. */
13028 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
13029 unsigned long mask, int rfe)
13032 bool not_first = FALSE;
13034 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
13035 fputc ('\t', stream);
13036 asm_fprintf (stream, instr, reg);
13037 fputc ('{', stream);
13039 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13040 if (mask & (1 << i))
13043 fprintf (stream, ", ");
13045 asm_fprintf (stream, "%r", i);
13050 fprintf (stream, "}^\n");
13052 fprintf (stream, "}\n");
13056 /* Output a FLDMD instruction to STREAM.
13057 BASE if the register containing the address.
13058 REG and COUNT specify the register range.
13059 Extra registers may be added to avoid hardware bugs.
13061 We output FLDMD even for ARMv5 VFP implementations. Although
13062 FLDMD is technically not supported until ARMv6, it is believed
13063 that all VFP implementations support its use in this context. */
13066 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
13070 /* Workaround ARM10 VFPr1 bug. */
13071 if (count == 2 && !arm_arch6)
13078 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
13079 load into multiple parts if we have to handle more than 16 registers. */
13082 vfp_output_fldmd (stream, base, reg, 16);
13083 vfp_output_fldmd (stream, base, reg + 16, count - 16);
13087 fputc ('\t', stream);
13088 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
13090 for (i = reg; i < reg + count; i++)
13093 fputs (", ", stream);
13094 asm_fprintf (stream, "d%d", i);
13096 fputs ("}\n", stream);
13101 /* Output the assembly for a store multiple. */
13104 vfp_output_fstmd (rtx * operands)
13111 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
13112 p = strlen (pattern);
13114 gcc_assert (GET_CODE (operands[1]) == REG);
13116 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
13117 for (i = 1; i < XVECLEN (operands[2], 0); i++)
13119 p += sprintf (&pattern[p], ", d%d", base + i);
13121 strcpy (&pattern[p], "}");
13123 output_asm_insn (pattern, operands);
13128 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13129 number of bytes pushed. */
13132 vfp_emit_fstmd (int base_reg, int count)
13139 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13140 register pairs are stored by a store multiple insn. We avoid this
13141 by pushing an extra pair. */
13142 if (count == 2 && !arm_arch6)
13144 if (base_reg == LAST_VFP_REGNUM - 3)
13149 /* FSTMD may not store more than 16 doubleword registers at once. Split
13150 larger stores into multiple parts (up to a maximum of two, in
13155 /* NOTE: base_reg is an internal register number, so each D register
13157 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
13158 saved += vfp_emit_fstmd (base_reg, 16);
13162 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13163 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13165 reg = gen_rtx_REG (DFmode, base_reg);
13168 XVECEXP (par, 0, 0)
13169 = gen_rtx_SET (VOIDmode,
13172 gen_rtx_PRE_MODIFY (Pmode,
13175 (stack_pointer_rtx,
13178 gen_rtx_UNSPEC (BLKmode,
13179 gen_rtvec (1, reg),
13180 UNSPEC_PUSH_MULT));
13182 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13183 plus_constant (stack_pointer_rtx, -(count * 8)));
13184 RTX_FRAME_RELATED_P (tmp) = 1;
13185 XVECEXP (dwarf, 0, 0) = tmp;
13187 tmp = gen_rtx_SET (VOIDmode,
13188 gen_frame_mem (DFmode, stack_pointer_rtx),
13190 RTX_FRAME_RELATED_P (tmp) = 1;
13191 XVECEXP (dwarf, 0, 1) = tmp;
13193 for (i = 1; i < count; i++)
13195 reg = gen_rtx_REG (DFmode, base_reg);
13197 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13199 tmp = gen_rtx_SET (VOIDmode,
13200 gen_frame_mem (DFmode,
13201 plus_constant (stack_pointer_rtx,
13204 RTX_FRAME_RELATED_P (tmp) = 1;
13205 XVECEXP (dwarf, 0, i + 1) = tmp;
13208 par = emit_insn (par);
13209 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13210 RTX_FRAME_RELATED_P (par) = 1;
13215 /* Emit a call instruction with pattern PAT. ADDR is the address of
13216 the call target. */
13219 arm_emit_call_insn (rtx pat, rtx addr)
13223 insn = emit_call_insn (pat);
13225 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13226 If the call might use such an entry, add a use of the PIC register
13227 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13228 if (TARGET_VXWORKS_RTP
13230 && GET_CODE (addr) == SYMBOL_REF
13231 && (SYMBOL_REF_DECL (addr)
13232 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13233 : !SYMBOL_REF_LOCAL_P (addr)))
13235 require_pic_register ();
13236 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13240 /* Output a 'call' insn. */
13242 output_call (rtx *operands)
13244 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
13246 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13247 if (REGNO (operands[0]) == LR_REGNUM)
13249 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13250 output_asm_insn ("mov%?\t%0, %|lr", operands);
13253 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13255 if (TARGET_INTERWORK || arm_arch4t)
13256 output_asm_insn ("bx%?\t%0", operands);
13258 output_asm_insn ("mov%?\t%|pc, %0", operands);
13263 /* Output a 'call' insn that is a reference in memory. This is
13264 disabled for ARMv5 and we prefer a blx instead because otherwise
13265 there's a significant performance overhead. */
13267 output_call_mem (rtx *operands)
13269 gcc_assert (!arm_arch5);
13270 if (TARGET_INTERWORK)
13272 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13273 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13274 output_asm_insn ("bx%?\t%|ip", operands);
13276 else if (regno_use_in (LR_REGNUM, operands[0]))
13278 /* LR is used in the memory address. We load the address in the
13279 first instruction. It's safe to use IP as the target of the
13280 load since the call will kill it anyway. */
13281 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13282 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13284 output_asm_insn ("bx%?\t%|ip", operands);
13286 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
13290 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13291 output_asm_insn ("ldr%?\t%|pc, %0", operands);
13298 /* Output a move from arm registers to an fpa registers.
13299 OPERANDS[0] is an fpa register.
13300 OPERANDS[1] is the first registers of an arm register pair. */
13302 output_mov_long_double_fpa_from_arm (rtx *operands)
13304 int arm_reg0 = REGNO (operands[1]);
13307 gcc_assert (arm_reg0 != IP_REGNUM);
13309 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13310 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13311 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13313 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13314 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
13319 /* Output a move from an fpa register to arm registers.
13320 OPERANDS[0] is the first registers of an arm register pair.
13321 OPERANDS[1] is an fpa register. */
13323 output_mov_long_double_arm_from_fpa (rtx *operands)
13325 int arm_reg0 = REGNO (operands[0]);
13328 gcc_assert (arm_reg0 != IP_REGNUM);
13330 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13331 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13332 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13334 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
13335 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13339 /* Output a move from arm registers to arm registers of a long double
13340 OPERANDS[0] is the destination.
13341 OPERANDS[1] is the source. */
13343 output_mov_long_double_arm_from_arm (rtx *operands)
13345 /* We have to be careful here because the two might overlap. */
13346 int dest_start = REGNO (operands[0]);
13347 int src_start = REGNO (operands[1]);
13351 if (dest_start < src_start)
13353 for (i = 0; i < 3; i++)
13355 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13356 ops[1] = gen_rtx_REG (SImode, src_start + i);
13357 output_asm_insn ("mov%?\t%0, %1", ops);
13362 for (i = 2; i >= 0; i--)
13364 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13365 ops[1] = gen_rtx_REG (SImode, src_start + i);
13366 output_asm_insn ("mov%?\t%0, %1", ops);
13374 arm_emit_movpair (rtx dest, rtx src)
13376 /* If the src is an immediate, simplify it. */
13377 if (CONST_INT_P (src))
13379 HOST_WIDE_INT val = INTVAL (src);
13380 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
13381 if ((val >> 16) & 0x0000ffff)
13382 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
13384 GEN_INT ((val >> 16) & 0x0000ffff));
13387 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
13388 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
13391 /* Output a move from arm registers to an fpa registers.
13392 OPERANDS[0] is an fpa register.
13393 OPERANDS[1] is the first registers of an arm register pair. */
13395 output_mov_double_fpa_from_arm (rtx *operands)
13397 int arm_reg0 = REGNO (operands[1]);
13400 gcc_assert (arm_reg0 != IP_REGNUM);
13402 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13403 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13404 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
13405 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
13409 /* Output a move from an fpa register to arm registers.
13410 OPERANDS[0] is the first registers of an arm register pair.
13411 OPERANDS[1] is an fpa register. */
13413 output_mov_double_arm_from_fpa (rtx *operands)
13415 int arm_reg0 = REGNO (operands[0]);
13418 gcc_assert (arm_reg0 != IP_REGNUM);
13420 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13421 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13422 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
13423 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
13427 /* Output a move between double words. It must be REG<-MEM
13430 output_move_double (rtx *operands, bool emit, int *count)
13432 enum rtx_code code0 = GET_CODE (operands[0]);
13433 enum rtx_code code1 = GET_CODE (operands[1]);
13438 /* The only case when this might happen is when
13439 you are looking at the length of a DImode instruction
13440 that has an invalid constant in it. */
13441 if (code0 == REG && code1 != MEM)
13443 gcc_assert (!emit);
13451 unsigned int reg0 = REGNO (operands[0]);
13453 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
13455 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
13457 switch (GET_CODE (XEXP (operands[1], 0)))
13464 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
13465 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
13467 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13472 gcc_assert (TARGET_LDRD);
13474 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
13482 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
13484 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
13493 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
13495 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
13500 gcc_assert (TARGET_LDRD);
13502 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
13507 /* Autoicrement addressing modes should never have overlapping
13508 base and destination registers, and overlapping index registers
13509 are already prohibited, so this doesn't need to worry about
13511 otherops[0] = operands[0];
13512 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
13513 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
13515 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
13517 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
13519 /* Registers overlap so split out the increment. */
13522 output_asm_insn ("add%?\t%1, %1, %2", otherops);
13523 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
13530 /* Use a single insn if we can.
13531 FIXME: IWMMXT allows offsets larger than ldrd can
13532 handle, fix these up with a pair of ldr. */
13534 || GET_CODE (otherops[2]) != CONST_INT
13535 || (INTVAL (otherops[2]) > -256
13536 && INTVAL (otherops[2]) < 256))
13539 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
13545 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
13546 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13556 /* Use a single insn if we can.
13557 FIXME: IWMMXT allows offsets larger than ldrd can handle,
13558 fix these up with a pair of ldr. */
13560 || GET_CODE (otherops[2]) != CONST_INT
13561 || (INTVAL (otherops[2]) > -256
13562 && INTVAL (otherops[2]) < 256))
13565 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
13571 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
13572 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
13582 /* We might be able to use ldrd %0, %1 here. However the range is
13583 different to ldr/adr, and it is broken on some ARMv7-M
13584 implementations. */
13585 /* Use the second register of the pair to avoid problematic
13587 otherops[1] = operands[1];
13589 output_asm_insn ("adr%?\t%0, %1", otherops);
13590 operands[1] = otherops[0];
13594 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13596 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
13603 /* ??? This needs checking for thumb2. */
13605 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
13606 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
13608 otherops[0] = operands[0];
13609 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
13610 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
13612 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
13614 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13616 switch ((int) INTVAL (otherops[2]))
13620 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
13626 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
13632 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
13636 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
13637 operands[1] = otherops[0];
13639 && (GET_CODE (otherops[2]) == REG
13641 || (GET_CODE (otherops[2]) == CONST_INT
13642 && INTVAL (otherops[2]) > -256
13643 && INTVAL (otherops[2]) < 256)))
13645 if (reg_overlap_mentioned_p (operands[0],
13649 /* Swap base and index registers over to
13650 avoid a conflict. */
13652 otherops[1] = otherops[2];
13655 /* If both registers conflict, it will usually
13656 have been fixed by a splitter. */
13657 if (reg_overlap_mentioned_p (operands[0], otherops[2])
13658 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
13662 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13663 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13670 otherops[0] = operands[0];
13672 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
13677 if (GET_CODE (otherops[2]) == CONST_INT)
13681 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
13682 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
13684 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13691 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13697 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
13701 return "ldr%(d%)\t%0, [%1]";
13703 return "ldm%(ia%)\t%1, %M0";
13707 otherops[1] = adjust_address (operands[1], SImode, 4);
13708 /* Take care of overlapping base/data reg. */
13709 if (reg_mentioned_p (operands[0], operands[1]))
13713 output_asm_insn ("ldr%?\t%0, %1", otherops);
13714 output_asm_insn ("ldr%?\t%0, %1", operands);
13724 output_asm_insn ("ldr%?\t%0, %1", operands);
13725 output_asm_insn ("ldr%?\t%0, %1", otherops);
13735 /* Constraints should ensure this. */
13736 gcc_assert (code0 == MEM && code1 == REG);
13737 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13739 switch (GET_CODE (XEXP (operands[0], 0)))
13745 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
13747 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13752 gcc_assert (TARGET_LDRD);
13754 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
13761 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
13763 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
13771 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
13773 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
13778 gcc_assert (TARGET_LDRD);
13780 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
13785 otherops[0] = operands[1];
13786 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
13787 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13789 /* IWMMXT allows offsets larger than ldrd can handle,
13790 fix these up with a pair of ldr. */
13792 && GET_CODE (otherops[2]) == CONST_INT
13793 && (INTVAL(otherops[2]) <= -256
13794 || INTVAL(otherops[2]) >= 256))
13796 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13800 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13801 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13810 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13811 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13817 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13820 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13825 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13830 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13831 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13833 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13837 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13844 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13851 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13856 && (GET_CODE (otherops[2]) == REG
13858 || (GET_CODE (otherops[2]) == CONST_INT
13859 && INTVAL (otherops[2]) > -256
13860 && INTVAL (otherops[2]) < 256)))
13862 otherops[0] = operands[1];
13863 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13865 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13871 otherops[0] = adjust_address (operands[0], SImode, 4);
13872 otherops[1] = operands[1];
13875 output_asm_insn ("str%?\t%1, %0", operands);
13876 output_asm_insn ("str%?\t%H1, %0", otherops);
13887 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13888 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13891 output_move_quad (rtx *operands)
13893 if (REG_P (operands[0]))
13895 /* Load, or reg->reg move. */
13897 if (MEM_P (operands[1]))
13899 switch (GET_CODE (XEXP (operands[1], 0)))
13902 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13907 output_asm_insn ("adr%?\t%0, %1", operands);
13908 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13912 gcc_unreachable ();
13920 gcc_assert (REG_P (operands[1]));
13922 dest = REGNO (operands[0]);
13923 src = REGNO (operands[1]);
13925 /* This seems pretty dumb, but hopefully GCC won't try to do it
13928 for (i = 0; i < 4; i++)
13930 ops[0] = gen_rtx_REG (SImode, dest + i);
13931 ops[1] = gen_rtx_REG (SImode, src + i);
13932 output_asm_insn ("mov%?\t%0, %1", ops);
13935 for (i = 3; i >= 0; i--)
13937 ops[0] = gen_rtx_REG (SImode, dest + i);
13938 ops[1] = gen_rtx_REG (SImode, src + i);
13939 output_asm_insn ("mov%?\t%0, %1", ops);
13945 gcc_assert (MEM_P (operands[0]));
13946 gcc_assert (REG_P (operands[1]));
13947 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13949 switch (GET_CODE (XEXP (operands[0], 0)))
13952 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13956 gcc_unreachable ();
13963 /* Output a VFP load or store instruction. */
13966 output_move_vfp (rtx *operands)
13968 rtx reg, mem, addr, ops[2];
13969 int load = REG_P (operands[0]);
13970 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13971 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13974 enum machine_mode mode;
13976 reg = operands[!load];
13977 mem = operands[load];
13979 mode = GET_MODE (reg);
13981 gcc_assert (REG_P (reg));
13982 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13983 gcc_assert (mode == SFmode
13987 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13988 gcc_assert (MEM_P (mem));
13990 addr = XEXP (mem, 0);
13992 switch (GET_CODE (addr))
13995 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13996 ops[0] = XEXP (addr, 0);
14001 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
14002 ops[0] = XEXP (addr, 0);
14007 templ = "f%s%c%%?\t%%%s0, %%1%s";
14013 sprintf (buff, templ,
14014 load ? "ld" : "st",
14017 integer_p ? "\t%@ int" : "");
14018 output_asm_insn (buff, ops);
14023 /* Output a Neon quad-word load or store, or a load or store for
14024 larger structure modes.
14026 WARNING: The ordering of elements is weird in big-endian mode,
14027 because we use VSTM, as required by the EABI. GCC RTL defines
14028 element ordering based on in-memory order. This can be differ
14029 from the architectural ordering of elements within a NEON register.
14030 The intrinsics defined in arm_neon.h use the NEON register element
14031 ordering, not the GCC RTL element ordering.
14033 For example, the in-memory ordering of a big-endian a quadword
14034 vector with 16-bit elements when stored from register pair {d0,d1}
14035 will be (lowest address first, d0[N] is NEON register element N):
14037 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14039 When necessary, quadword registers (dN, dN+1) are moved to ARM
14040 registers from rN in the order:
14042 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14044 So that STM/LDM can be used on vectors in ARM registers, and the
14045 same memory layout will result as if VSTM/VLDM were used. */
14048 output_move_neon (rtx *operands)
14050 rtx reg, mem, addr, ops[2];
14051 int regno, load = REG_P (operands[0]);
14054 enum machine_mode mode;
14056 reg = operands[!load];
14057 mem = operands[load];
14059 mode = GET_MODE (reg);
14061 gcc_assert (REG_P (reg));
14062 regno = REGNO (reg);
14063 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
14064 || NEON_REGNO_OK_FOR_QUAD (regno));
14065 gcc_assert (VALID_NEON_DREG_MODE (mode)
14066 || VALID_NEON_QREG_MODE (mode)
14067 || VALID_NEON_STRUCT_MODE (mode));
14068 gcc_assert (MEM_P (mem));
14070 addr = XEXP (mem, 0);
14072 /* Strip off const from addresses like (const (plus (...))). */
14073 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14074 addr = XEXP (addr, 0);
14076 switch (GET_CODE (addr))
14079 templ = "v%smia%%?\t%%0!, %%h1";
14080 ops[0] = XEXP (addr, 0);
14085 /* FIXME: We should be using vld1/vst1 here in BE mode? */
14086 templ = "v%smdb%%?\t%%0!, %%h1";
14087 ops[0] = XEXP (addr, 0);
14092 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14093 gcc_unreachable ();
14098 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14101 for (i = 0; i < nregs; i++)
14103 /* We're only using DImode here because it's a convenient size. */
14104 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
14105 ops[1] = adjust_address (mem, DImode, 8 * i);
14106 if (reg_overlap_mentioned_p (ops[0], mem))
14108 gcc_assert (overlap == -1);
14113 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14114 output_asm_insn (buff, ops);
14119 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
14120 ops[1] = adjust_address (mem, SImode, 8 * overlap);
14121 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14122 output_asm_insn (buff, ops);
14129 templ = "v%smia%%?\t%%m0, %%h1";
14134 sprintf (buff, templ, load ? "ld" : "st");
14135 output_asm_insn (buff, ops);
14140 /* Compute and return the length of neon_mov<mode>, where <mode> is
14141 one of VSTRUCT modes: EI, OI, CI or XI. */
14143 arm_attr_length_move_neon (rtx insn)
14145 rtx reg, mem, addr;
14147 enum machine_mode mode;
14149 extract_insn_cached (insn);
14151 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
14153 mode = GET_MODE (recog_data.operand[0]);
14164 gcc_unreachable ();
14168 load = REG_P (recog_data.operand[0]);
14169 reg = recog_data.operand[!load];
14170 mem = recog_data.operand[load];
14172 gcc_assert (MEM_P (mem));
14174 mode = GET_MODE (reg);
14175 addr = XEXP (mem, 0);
14177 /* Strip off const from addresses like (const (plus (...))). */
14178 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14179 addr = XEXP (addr, 0);
14181 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
14183 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14190 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14194 arm_address_offset_is_imm (rtx insn)
14198 extract_insn_cached (insn);
14200 if (REG_P (recog_data.operand[0]))
14203 mem = recog_data.operand[0];
14205 gcc_assert (MEM_P (mem));
14207 addr = XEXP (mem, 0);
14209 if (GET_CODE (addr) == REG
14210 || (GET_CODE (addr) == PLUS
14211 && GET_CODE (XEXP (addr, 0)) == REG
14212 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
14218 /* Output an ADD r, s, #n where n may be too big for one instruction.
14219 If adding zero to one register, output nothing. */
14221 output_add_immediate (rtx *operands)
14223 HOST_WIDE_INT n = INTVAL (operands[2]);
14225 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
14228 output_multi_immediate (operands,
14229 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14232 output_multi_immediate (operands,
14233 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14240 /* Output a multiple immediate operation.
14241 OPERANDS is the vector of operands referred to in the output patterns.
14242 INSTR1 is the output pattern to use for the first constant.
14243 INSTR2 is the output pattern to use for subsequent constants.
14244 IMMED_OP is the index of the constant slot in OPERANDS.
14245 N is the constant value. */
14246 static const char *
14247 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
14248 int immed_op, HOST_WIDE_INT n)
14250 #if HOST_BITS_PER_WIDE_INT > 32
14256 /* Quick and easy output. */
14257 operands[immed_op] = const0_rtx;
14258 output_asm_insn (instr1, operands);
14263 const char * instr = instr1;
14265 /* Note that n is never zero here (which would give no output). */
14266 for (i = 0; i < 32; i += 2)
14270 operands[immed_op] = GEN_INT (n & (255 << i));
14271 output_asm_insn (instr, operands);
14281 /* Return the name of a shifter operation. */
14282 static const char *
14283 arm_shift_nmem(enum rtx_code code)
14288 return ARM_LSL_NAME;
14304 /* Return the appropriate ARM instruction for the operation code.
14305 The returned result should not be overwritten. OP is the rtx of the
14306 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14309 arithmetic_instr (rtx op, int shift_first_arg)
14311 switch (GET_CODE (op))
14317 return shift_first_arg ? "rsb" : "sub";
14332 return arm_shift_nmem(GET_CODE(op));
14335 gcc_unreachable ();
14339 /* Ensure valid constant shifts and return the appropriate shift mnemonic
14340 for the operation code. The returned result should not be overwritten.
14341 OP is the rtx code of the shift.
14342 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14344 static const char *
14345 shift_op (rtx op, HOST_WIDE_INT *amountp)
14348 enum rtx_code code = GET_CODE (op);
14350 switch (GET_CODE (XEXP (op, 1)))
14358 *amountp = INTVAL (XEXP (op, 1));
14362 gcc_unreachable ();
14368 gcc_assert (*amountp != -1);
14369 *amountp = 32 - *amountp;
14372 /* Fall through. */
14378 mnem = arm_shift_nmem(code);
14382 /* We never have to worry about the amount being other than a
14383 power of 2, since this case can never be reloaded from a reg. */
14384 gcc_assert (*amountp != -1);
14385 *amountp = int_log2 (*amountp);
14386 return ARM_LSL_NAME;
14389 gcc_unreachable ();
14392 if (*amountp != -1)
14394 /* This is not 100% correct, but follows from the desire to merge
14395 multiplication by a power of 2 with the recognizer for a
14396 shift. >=32 is not a valid shift for "lsl", so we must try and
14397 output a shift that produces the correct arithmetical result.
14398 Using lsr #32 is identical except for the fact that the carry bit
14399 is not set correctly if we set the flags; but we never use the
14400 carry bit from such an operation, so we can ignore that. */
14401 if (code == ROTATERT)
14402 /* Rotate is just modulo 32. */
14404 else if (*amountp != (*amountp & 31))
14406 if (code == ASHIFT)
14411 /* Shifts of 0 are no-ops. */
14419 /* Obtain the shift from the POWER of two. */
14421 static HOST_WIDE_INT
14422 int_log2 (HOST_WIDE_INT power)
14424 HOST_WIDE_INT shift = 0;
14426 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
14428 gcc_assert (shift <= 31);
14435 /* Output a .ascii pseudo-op, keeping track of lengths. This is
14436 because /bin/as is horribly restrictive. The judgement about
14437 whether or not each character is 'printable' (and can be output as
14438 is) or not (and must be printed with an octal escape) must be made
14439 with reference to the *host* character set -- the situation is
14440 similar to that discussed in the comments above pp_c_char in
14441 c-pretty-print.c. */
14443 #define MAX_ASCII_LEN 51
14446 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
14449 int len_so_far = 0;
14451 fputs ("\t.ascii\t\"", stream);
14453 for (i = 0; i < len; i++)
14457 if (len_so_far >= MAX_ASCII_LEN)
14459 fputs ("\"\n\t.ascii\t\"", stream);
14465 if (c == '\\' || c == '\"')
14467 putc ('\\', stream);
14475 fprintf (stream, "\\%03o", c);
14480 fputs ("\"\n", stream);
14483 /* Compute the register save mask for registers 0 through 12
14484 inclusive. This code is used by arm_compute_save_reg_mask. */
14486 static unsigned long
14487 arm_compute_save_reg0_reg12_mask (void)
14489 unsigned long func_type = arm_current_func_type ();
14490 unsigned long save_reg_mask = 0;
14493 if (IS_INTERRUPT (func_type))
14495 unsigned int max_reg;
14496 /* Interrupt functions must not corrupt any registers,
14497 even call clobbered ones. If this is a leaf function
14498 we can just examine the registers used by the RTL, but
14499 otherwise we have to assume that whatever function is
14500 called might clobber anything, and so we have to save
14501 all the call-clobbered registers as well. */
14502 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
14503 /* FIQ handlers have registers r8 - r12 banked, so
14504 we only need to check r0 - r7, Normal ISRs only
14505 bank r14 and r15, so we must check up to r12.
14506 r13 is the stack pointer which is always preserved,
14507 so we do not need to consider it here. */
14512 for (reg = 0; reg <= max_reg; reg++)
14513 if (df_regs_ever_live_p (reg)
14514 || (! current_function_is_leaf && call_used_regs[reg]))
14515 save_reg_mask |= (1 << reg);
14517 /* Also save the pic base register if necessary. */
14519 && !TARGET_SINGLE_PIC_BASE
14520 && arm_pic_register != INVALID_REGNUM
14521 && crtl->uses_pic_offset_table)
14522 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14524 else if (IS_VOLATILE(func_type))
14526 /* For noreturn functions we historically omitted register saves
14527 altogether. However this really messes up debugging. As a
14528 compromise save just the frame pointers. Combined with the link
14529 register saved elsewhere this should be sufficient to get
14531 if (frame_pointer_needed)
14532 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
14533 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
14534 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14535 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
14536 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
14540 /* In the normal case we only need to save those registers
14541 which are call saved and which are used by this function. */
14542 for (reg = 0; reg <= 11; reg++)
14543 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14544 save_reg_mask |= (1 << reg);
14546 /* Handle the frame pointer as a special case. */
14547 if (frame_pointer_needed)
14548 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
14550 /* If we aren't loading the PIC register,
14551 don't stack it even though it may be live. */
14553 && !TARGET_SINGLE_PIC_BASE
14554 && arm_pic_register != INVALID_REGNUM
14555 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
14556 || crtl->uses_pic_offset_table))
14557 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14559 /* The prologue will copy SP into R0, so save it. */
14560 if (IS_STACKALIGN (func_type))
14561 save_reg_mask |= 1;
14564 /* Save registers so the exception handler can modify them. */
14565 if (crtl->calls_eh_return)
14571 reg = EH_RETURN_DATA_REGNO (i);
14572 if (reg == INVALID_REGNUM)
14574 save_reg_mask |= 1 << reg;
14578 return save_reg_mask;
14582 /* Compute the number of bytes used to store the static chain register on the
14583 stack, above the stack frame. We need to know this accurately to get the
14584 alignment of the rest of the stack frame correct. */
14586 static int arm_compute_static_chain_stack_bytes (void)
14588 unsigned long func_type = arm_current_func_type ();
14589 int static_chain_stack_bytes = 0;
14591 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
14592 IS_NESTED (func_type) &&
14593 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
14594 static_chain_stack_bytes = 4;
14596 return static_chain_stack_bytes;
14600 /* Compute a bit mask of which registers need to be
14601 saved on the stack for the current function.
14602 This is used by arm_get_frame_offsets, which may add extra registers. */
14604 static unsigned long
14605 arm_compute_save_reg_mask (void)
14607 unsigned int save_reg_mask = 0;
14608 unsigned long func_type = arm_current_func_type ();
14611 if (IS_NAKED (func_type))
14612 /* This should never really happen. */
14615 /* If we are creating a stack frame, then we must save the frame pointer,
14616 IP (which will hold the old stack pointer), LR and the PC. */
14617 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14619 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
14622 | (1 << PC_REGNUM);
14624 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
14626 /* Decide if we need to save the link register.
14627 Interrupt routines have their own banked link register,
14628 so they never need to save it.
14629 Otherwise if we do not use the link register we do not need to save
14630 it. If we are pushing other registers onto the stack however, we
14631 can save an instruction in the epilogue by pushing the link register
14632 now and then popping it back into the PC. This incurs extra memory
14633 accesses though, so we only do it when optimizing for size, and only
14634 if we know that we will not need a fancy return sequence. */
14635 if (df_regs_ever_live_p (LR_REGNUM)
14638 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14639 && !crtl->calls_eh_return))
14640 save_reg_mask |= 1 << LR_REGNUM;
14642 if (cfun->machine->lr_save_eliminated)
14643 save_reg_mask &= ~ (1 << LR_REGNUM);
14645 if (TARGET_REALLY_IWMMXT
14646 && ((bit_count (save_reg_mask)
14647 + ARM_NUM_INTS (crtl->args.pretend_args_size +
14648 arm_compute_static_chain_stack_bytes())
14651 /* The total number of registers that are going to be pushed
14652 onto the stack is odd. We need to ensure that the stack
14653 is 64-bit aligned before we start to save iWMMXt registers,
14654 and also before we start to create locals. (A local variable
14655 might be a double or long long which we will load/store using
14656 an iWMMXt instruction). Therefore we need to push another
14657 ARM register, so that the stack will be 64-bit aligned. We
14658 try to avoid using the arg registers (r0 -r3) as they might be
14659 used to pass values in a tail call. */
14660 for (reg = 4; reg <= 12; reg++)
14661 if ((save_reg_mask & (1 << reg)) == 0)
14665 save_reg_mask |= (1 << reg);
14668 cfun->machine->sibcall_blocked = 1;
14669 save_reg_mask |= (1 << 3);
14673 /* We may need to push an additional register for use initializing the
14674 PIC base register. */
14675 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
14676 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
14678 reg = thumb_find_work_register (1 << 4);
14679 if (!call_used_regs[reg])
14680 save_reg_mask |= (1 << reg);
14683 return save_reg_mask;
14687 /* Compute a bit mask of which registers need to be
14688 saved on the stack for the current function. */
14689 static unsigned long
14690 thumb1_compute_save_reg_mask (void)
14692 unsigned long mask;
14696 for (reg = 0; reg < 12; reg ++)
14697 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14701 && !TARGET_SINGLE_PIC_BASE
14702 && arm_pic_register != INVALID_REGNUM
14703 && crtl->uses_pic_offset_table)
14704 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
14706 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14707 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
14708 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
14710 /* LR will also be pushed if any lo regs are pushed. */
14711 if (mask & 0xff || thumb_force_lr_save ())
14712 mask |= (1 << LR_REGNUM);
14714 /* Make sure we have a low work register if we need one.
14715 We will need one if we are going to push a high register,
14716 but we are not currently intending to push a low register. */
14717 if ((mask & 0xff) == 0
14718 && ((mask & 0x0f00) || TARGET_BACKTRACE))
14720 /* Use thumb_find_work_register to choose which register
14721 we will use. If the register is live then we will
14722 have to push it. Use LAST_LO_REGNUM as our fallback
14723 choice for the register to select. */
14724 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
14725 /* Make sure the register returned by thumb_find_work_register is
14726 not part of the return value. */
14727 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
14728 reg = LAST_LO_REGNUM;
14730 if (! call_used_regs[reg])
14734 /* The 504 below is 8 bytes less than 512 because there are two possible
14735 alignment words. We can't tell here if they will be present or not so we
14736 have to play it safe and assume that they are. */
14737 if ((CALLER_INTERWORKING_SLOT_SIZE +
14738 ROUND_UP_WORD (get_frame_size ()) +
14739 crtl->outgoing_args_size) >= 504)
14741 /* This is the same as the code in thumb1_expand_prologue() which
14742 determines which register to use for stack decrement. */
14743 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
14744 if (mask & (1 << reg))
14747 if (reg > LAST_LO_REGNUM)
14749 /* Make sure we have a register available for stack decrement. */
14750 mask |= 1 << LAST_LO_REGNUM;
14758 /* Return the number of bytes required to save VFP registers. */
14760 arm_get_vfp_saved_size (void)
14762 unsigned int regno;
14767 /* Space for saved VFP registers. */
14768 if (TARGET_HARD_FLOAT && TARGET_VFP)
14771 for (regno = FIRST_VFP_REGNUM;
14772 regno < LAST_VFP_REGNUM;
14775 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
14776 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
14780 /* Workaround ARM10 VFPr1 bug. */
14781 if (count == 2 && !arm_arch6)
14783 saved += count * 8;
14792 if (count == 2 && !arm_arch6)
14794 saved += count * 8;
14801 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14802 everything bar the final return instruction. */
14804 output_return_instruction (rtx operand, int really_return, int reverse)
14806 char conditional[10];
14809 unsigned long live_regs_mask;
14810 unsigned long func_type;
14811 arm_stack_offsets *offsets;
14813 func_type = arm_current_func_type ();
14815 if (IS_NAKED (func_type))
14818 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14820 /* If this function was declared non-returning, and we have
14821 found a tail call, then we have to trust that the called
14822 function won't return. */
14827 /* Otherwise, trap an attempted return by aborting. */
14829 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14831 assemble_external_libcall (ops[1]);
14832 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14838 gcc_assert (!cfun->calls_alloca || really_return);
14840 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14842 cfun->machine->return_used_this_function = 1;
14844 offsets = arm_get_frame_offsets ();
14845 live_regs_mask = offsets->saved_regs_mask;
14847 if (live_regs_mask)
14849 const char * return_reg;
14851 /* If we do not have any special requirements for function exit
14852 (e.g. interworking) then we can load the return address
14853 directly into the PC. Otherwise we must load it into LR. */
14855 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14856 return_reg = reg_names[PC_REGNUM];
14858 return_reg = reg_names[LR_REGNUM];
14860 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14862 /* There are three possible reasons for the IP register
14863 being saved. 1) a stack frame was created, in which case
14864 IP contains the old stack pointer, or 2) an ISR routine
14865 corrupted it, or 3) it was saved to align the stack on
14866 iWMMXt. In case 1, restore IP into SP, otherwise just
14868 if (frame_pointer_needed)
14870 live_regs_mask &= ~ (1 << IP_REGNUM);
14871 live_regs_mask |= (1 << SP_REGNUM);
14874 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14877 /* On some ARM architectures it is faster to use LDR rather than
14878 LDM to load a single register. On other architectures, the
14879 cost is the same. In 26 bit mode, or for exception handlers,
14880 we have to use LDM to load the PC so that the CPSR is also
14882 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14883 if (live_regs_mask == (1U << reg))
14886 if (reg <= LAST_ARM_REGNUM
14887 && (reg != LR_REGNUM
14889 || ! IS_INTERRUPT (func_type)))
14891 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14892 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14899 /* Generate the load multiple instruction to restore the
14900 registers. Note we can get here, even if
14901 frame_pointer_needed is true, but only if sp already
14902 points to the base of the saved core registers. */
14903 if (live_regs_mask & (1 << SP_REGNUM))
14905 unsigned HOST_WIDE_INT stack_adjust;
14907 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14908 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14910 if (stack_adjust && arm_arch5 && TARGET_ARM)
14911 if (TARGET_UNIFIED_ASM)
14912 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14914 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14917 /* If we can't use ldmib (SA110 bug),
14918 then try to pop r3 instead. */
14920 live_regs_mask |= 1 << 3;
14922 if (TARGET_UNIFIED_ASM)
14923 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14925 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14929 if (TARGET_UNIFIED_ASM)
14930 sprintf (instr, "pop%s\t{", conditional);
14932 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14934 p = instr + strlen (instr);
14936 for (reg = 0; reg <= SP_REGNUM; reg++)
14937 if (live_regs_mask & (1 << reg))
14939 int l = strlen (reg_names[reg]);
14945 memcpy (p, ", ", 2);
14949 memcpy (p, "%|", 2);
14950 memcpy (p + 2, reg_names[reg], l);
14954 if (live_regs_mask & (1 << LR_REGNUM))
14956 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14957 /* If returning from an interrupt, restore the CPSR. */
14958 if (IS_INTERRUPT (func_type))
14965 output_asm_insn (instr, & operand);
14967 /* See if we need to generate an extra instruction to
14968 perform the actual function return. */
14970 && func_type != ARM_FT_INTERWORKED
14971 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14973 /* The return has already been handled
14974 by loading the LR into the PC. */
14981 switch ((int) ARM_FUNC_TYPE (func_type))
14985 /* ??? This is wrong for unified assembly syntax. */
14986 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14989 case ARM_FT_INTERWORKED:
14990 sprintf (instr, "bx%s\t%%|lr", conditional);
14993 case ARM_FT_EXCEPTION:
14994 /* ??? This is wrong for unified assembly syntax. */
14995 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14999 /* Use bx if it's available. */
15000 if (arm_arch5 || arm_arch4t)
15001 sprintf (instr, "bx%s\t%%|lr", conditional);
15003 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
15007 output_asm_insn (instr, & operand);
15013 /* Write the function name into the code section, directly preceding
15014 the function prologue.
15016 Code will be output similar to this:
15018 .ascii "arm_poke_function_name", 0
15021 .word 0xff000000 + (t1 - t0)
15022 arm_poke_function_name
15024 stmfd sp!, {fp, ip, lr, pc}
15027 When performing a stack backtrace, code can inspect the value
15028 of 'pc' stored at 'fp' + 0. If the trace function then looks
15029 at location pc - 12 and the top 8 bits are set, then we know
15030 that there is a function name embedded immediately preceding this
15031 location and has length ((pc[-3]) & 0xff000000).
15033 We assume that pc is declared as a pointer to an unsigned long.
15035 It is of no benefit to output the function name if we are assembling
15036 a leaf function. These function types will not contain a stack
15037 backtrace structure, therefore it is not possible to determine the
15040 arm_poke_function_name (FILE *stream, const char *name)
15042 unsigned long alignlength;
15043 unsigned long length;
15046 length = strlen (name) + 1;
15047 alignlength = ROUND_UP_WORD (length);
15049 ASM_OUTPUT_ASCII (stream, name, length);
15050 ASM_OUTPUT_ALIGN (stream, 2);
15051 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
15052 assemble_aligned_integer (UNITS_PER_WORD, x);
15055 /* Place some comments into the assembler stream
15056 describing the current function. */
15058 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
15060 unsigned long func_type;
15062 /* ??? Do we want to print some of the below anyway? */
15066 /* Sanity check. */
15067 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
15069 func_type = arm_current_func_type ();
15071 switch ((int) ARM_FUNC_TYPE (func_type))
15074 case ARM_FT_NORMAL:
15076 case ARM_FT_INTERWORKED:
15077 asm_fprintf (f, "\t%@ Function supports interworking.\n");
15080 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
15083 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
15085 case ARM_FT_EXCEPTION:
15086 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
15090 if (IS_NAKED (func_type))
15091 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15093 if (IS_VOLATILE (func_type))
15094 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
15096 if (IS_NESTED (func_type))
15097 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
15098 if (IS_STACKALIGN (func_type))
15099 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15101 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15103 crtl->args.pretend_args_size, frame_size);
15105 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15106 frame_pointer_needed,
15107 cfun->machine->uses_anonymous_args);
15109 if (cfun->machine->lr_save_eliminated)
15110 asm_fprintf (f, "\t%@ link register save eliminated.\n");
15112 if (crtl->calls_eh_return)
15113 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
15118 arm_output_epilogue (rtx sibling)
15121 unsigned long saved_regs_mask;
15122 unsigned long func_type;
15123 /* Floats_offset is the offset from the "virtual" frame. In an APCS
15124 frame that is $fp + 4 for a non-variadic function. */
15125 int floats_offset = 0;
15127 FILE * f = asm_out_file;
15128 unsigned int lrm_count = 0;
15129 int really_return = (sibling == NULL);
15131 arm_stack_offsets *offsets;
15133 /* If we have already generated the return instruction
15134 then it is futile to generate anything else. */
15135 if (use_return_insn (FALSE, sibling) &&
15136 (cfun->machine->return_used_this_function != 0))
15139 func_type = arm_current_func_type ();
15141 if (IS_NAKED (func_type))
15142 /* Naked functions don't have epilogues. */
15145 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15149 /* A volatile function should never return. Call abort. */
15150 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
15151 assemble_external_libcall (op);
15152 output_asm_insn ("bl\t%a0", &op);
15157 /* If we are throwing an exception, then we really must be doing a
15158 return, so we can't tail-call. */
15159 gcc_assert (!crtl->calls_eh_return || really_return);
15161 offsets = arm_get_frame_offsets ();
15162 saved_regs_mask = offsets->saved_regs_mask;
15165 lrm_count = bit_count (saved_regs_mask);
15167 floats_offset = offsets->saved_args;
15168 /* Compute how far away the floats will be. */
15169 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15170 if (saved_regs_mask & (1 << reg))
15171 floats_offset += 4;
15173 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15175 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
15176 int vfp_offset = offsets->frame;
15178 if (TARGET_FPA_EMU2)
15180 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15181 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15183 floats_offset += 12;
15184 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
15185 reg, FP_REGNUM, floats_offset - vfp_offset);
15190 start_reg = LAST_FPA_REGNUM;
15192 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15194 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15196 floats_offset += 12;
15198 /* We can't unstack more than four registers at once. */
15199 if (start_reg - reg == 3)
15201 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
15202 reg, FP_REGNUM, floats_offset - vfp_offset);
15203 start_reg = reg - 1;
15208 if (reg != start_reg)
15209 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15210 reg + 1, start_reg - reg,
15211 FP_REGNUM, floats_offset - vfp_offset);
15212 start_reg = reg - 1;
15216 /* Just in case the last register checked also needs unstacking. */
15217 if (reg != start_reg)
15218 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15219 reg + 1, start_reg - reg,
15220 FP_REGNUM, floats_offset - vfp_offset);
15223 if (TARGET_HARD_FLOAT && TARGET_VFP)
15227 /* The fldmd insns do not have base+offset addressing
15228 modes, so we use IP to hold the address. */
15229 saved_size = arm_get_vfp_saved_size ();
15231 if (saved_size > 0)
15233 floats_offset += saved_size;
15234 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
15235 FP_REGNUM, floats_offset - vfp_offset);
15237 start_reg = FIRST_VFP_REGNUM;
15238 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15240 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15241 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15243 if (start_reg != reg)
15244 vfp_output_fldmd (f, IP_REGNUM,
15245 (start_reg - FIRST_VFP_REGNUM) / 2,
15246 (reg - start_reg) / 2);
15247 start_reg = reg + 2;
15250 if (start_reg != reg)
15251 vfp_output_fldmd (f, IP_REGNUM,
15252 (start_reg - FIRST_VFP_REGNUM) / 2,
15253 (reg - start_reg) / 2);
15258 /* The frame pointer is guaranteed to be non-double-word aligned.
15259 This is because it is set to (old_stack_pointer - 4) and the
15260 old_stack_pointer was double word aligned. Thus the offset to
15261 the iWMMXt registers to be loaded must also be non-double-word
15262 sized, so that the resultant address *is* double-word aligned.
15263 We can ignore floats_offset since that was already included in
15264 the live_regs_mask. */
15265 lrm_count += (lrm_count % 2 ? 2 : 1);
15267 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15268 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15270 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
15271 reg, FP_REGNUM, lrm_count * 4);
15276 /* saved_regs_mask should contain the IP, which at the time of stack
15277 frame generation actually contains the old stack pointer. So a
15278 quick way to unwind the stack is just pop the IP register directly
15279 into the stack pointer. */
15280 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
15281 saved_regs_mask &= ~ (1 << IP_REGNUM);
15282 saved_regs_mask |= (1 << SP_REGNUM);
15284 /* There are two registers left in saved_regs_mask - LR and PC. We
15285 only need to restore the LR register (the return address), but to
15286 save time we can load it directly into the PC, unless we need a
15287 special function exit sequence, or we are not really returning. */
15289 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15290 && !crtl->calls_eh_return)
15291 /* Delete the LR from the register mask, so that the LR on
15292 the stack is loaded into the PC in the register mask. */
15293 saved_regs_mask &= ~ (1 << LR_REGNUM);
15295 saved_regs_mask &= ~ (1 << PC_REGNUM);
15297 /* We must use SP as the base register, because SP is one of the
15298 registers being restored. If an interrupt or page fault
15299 happens in the ldm instruction, the SP might or might not
15300 have been restored. That would be bad, as then SP will no
15301 longer indicate the safe area of stack, and we can get stack
15302 corruption. Using SP as the base register means that it will
15303 be reset correctly to the original value, should an interrupt
15304 occur. If the stack pointer already points at the right
15305 place, then omit the subtraction. */
15306 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
15307 || cfun->calls_alloca)
15308 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
15309 4 * bit_count (saved_regs_mask));
15310 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
15312 if (IS_INTERRUPT (func_type))
15313 /* Interrupt handlers will have pushed the
15314 IP onto the stack, so restore it now. */
15315 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
15319 /* This branch is executed for ARM mode (non-apcs frames) and
15320 Thumb-2 mode. Frame layout is essentially the same for those
15321 cases, except that in ARM mode frame pointer points to the
15322 first saved register, while in Thumb-2 mode the frame pointer points
15323 to the last saved register.
15325 It is possible to make frame pointer point to last saved
15326 register in both cases, and remove some conditionals below.
15327 That means that fp setup in prologue would be just "mov fp, sp"
15328 and sp restore in epilogue would be just "mov sp, fp", whereas
15329 now we have to use add/sub in those cases. However, the value
15330 of that would be marginal, as both mov and add/sub are 32-bit
15331 in ARM mode, and it would require extra conditionals
15332 in arm_expand_prologue to distingish ARM-apcs-frame case
15333 (where frame pointer is required to point at first register)
15334 and ARM-non-apcs-frame. Therefore, such change is postponed
15335 until real need arise. */
15336 unsigned HOST_WIDE_INT amount;
15338 /* Restore stack pointer if necessary. */
15339 if (TARGET_ARM && frame_pointer_needed)
15341 operands[0] = stack_pointer_rtx;
15342 operands[1] = hard_frame_pointer_rtx;
15344 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
15345 output_add_immediate (operands);
15349 if (frame_pointer_needed)
15351 /* For Thumb-2 restore sp from the frame pointer.
15352 Operand restrictions mean we have to incrememnt FP, then copy
15354 amount = offsets->locals_base - offsets->saved_regs;
15355 operands[0] = hard_frame_pointer_rtx;
15359 unsigned long count;
15360 operands[0] = stack_pointer_rtx;
15361 amount = offsets->outgoing_args - offsets->saved_regs;
15362 /* pop call clobbered registers if it avoids a
15363 separate stack adjustment. */
15364 count = offsets->saved_regs - offsets->saved_args;
15367 && !crtl->calls_eh_return
15368 && bit_count(saved_regs_mask) * 4 == count
15369 && !IS_INTERRUPT (func_type)
15370 && !IS_STACKALIGN (func_type)
15371 && !crtl->tail_call_emit)
15373 unsigned long mask;
15374 /* Preserve return values, of any size. */
15375 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
15377 mask &= ~saved_regs_mask;
15379 while (bit_count (mask) * 4 > amount)
15381 while ((mask & (1 << reg)) == 0)
15383 mask &= ~(1 << reg);
15385 if (bit_count (mask) * 4 == amount) {
15387 saved_regs_mask |= mask;
15394 operands[1] = operands[0];
15395 operands[2] = GEN_INT (amount);
15396 output_add_immediate (operands);
15398 if (frame_pointer_needed)
15399 asm_fprintf (f, "\tmov\t%r, %r\n",
15400 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
15403 if (TARGET_FPA_EMU2)
15405 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15406 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15407 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
15412 start_reg = FIRST_FPA_REGNUM;
15414 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
15416 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15418 if (reg - start_reg == 3)
15420 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
15421 start_reg, SP_REGNUM);
15422 start_reg = reg + 1;
15427 if (reg != start_reg)
15428 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15429 start_reg, reg - start_reg,
15432 start_reg = reg + 1;
15436 /* Just in case the last register checked also needs unstacking. */
15437 if (reg != start_reg)
15438 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
15439 start_reg, reg - start_reg, SP_REGNUM);
15442 if (TARGET_HARD_FLOAT && TARGET_VFP)
15444 int end_reg = LAST_VFP_REGNUM + 1;
15446 /* Scan the registers in reverse order. We need to match
15447 any groupings made in the prologue and generate matching
15449 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
15451 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15452 && (!df_regs_ever_live_p (reg + 1)
15453 || call_used_regs[reg + 1]))
15455 if (end_reg > reg + 2)
15456 vfp_output_fldmd (f, SP_REGNUM,
15457 (reg + 2 - FIRST_VFP_REGNUM) / 2,
15458 (end_reg - (reg + 2)) / 2);
15462 if (end_reg > reg + 2)
15463 vfp_output_fldmd (f, SP_REGNUM, 0,
15464 (end_reg - (reg + 2)) / 2);
15468 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
15469 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15470 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
15472 /* If we can, restore the LR into the PC. */
15473 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
15474 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
15475 && !IS_STACKALIGN (func_type)
15477 && crtl->args.pretend_args_size == 0
15478 && saved_regs_mask & (1 << LR_REGNUM)
15479 && !crtl->calls_eh_return)
15481 saved_regs_mask &= ~ (1 << LR_REGNUM);
15482 saved_regs_mask |= (1 << PC_REGNUM);
15483 rfe = IS_INTERRUPT (func_type);
15488 /* Load the registers off the stack. If we only have one register
15489 to load use the LDR instruction - it is faster. For Thumb-2
15490 always use pop and the assembler will pick the best instruction.*/
15491 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
15492 && !IS_INTERRUPT(func_type))
15494 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
15496 else if (saved_regs_mask)
15498 if (saved_regs_mask & (1 << SP_REGNUM))
15499 /* Note - write back to the stack register is not enabled
15500 (i.e. "ldmfd sp!..."). We know that the stack pointer is
15501 in the list of registers and if we add writeback the
15502 instruction becomes UNPREDICTABLE. */
15503 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
15505 else if (TARGET_ARM)
15506 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
15509 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
15512 if (crtl->args.pretend_args_size)
15514 /* Unwind the pre-pushed regs. */
15515 operands[0] = operands[1] = stack_pointer_rtx;
15516 operands[2] = GEN_INT (crtl->args.pretend_args_size);
15517 output_add_immediate (operands);
15521 /* We may have already restored PC directly from the stack. */
15522 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
15525 /* Stack adjustment for exception handler. */
15526 if (crtl->calls_eh_return)
15527 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
15528 ARM_EH_STACKADJ_REGNUM);
15530 /* Generate the return instruction. */
15531 switch ((int) ARM_FUNC_TYPE (func_type))
15535 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
15538 case ARM_FT_EXCEPTION:
15539 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
15542 case ARM_FT_INTERWORKED:
15543 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
15547 if (IS_STACKALIGN (func_type))
15549 /* See comment in arm_expand_prologue. */
15550 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
15552 if (arm_arch5 || arm_arch4t)
15553 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
15555 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
15563 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
15564 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
15566 arm_stack_offsets *offsets;
15572 /* Emit any call-via-reg trampolines that are needed for v4t support
15573 of call_reg and call_value_reg type insns. */
15574 for (regno = 0; regno < LR_REGNUM; regno++)
15576 rtx label = cfun->machine->call_via[regno];
15580 switch_to_section (function_section (current_function_decl));
15581 targetm.asm_out.internal_label (asm_out_file, "L",
15582 CODE_LABEL_NUMBER (label));
15583 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
15587 /* ??? Probably not safe to set this here, since it assumes that a
15588 function will be emitted as assembly immediately after we generate
15589 RTL for it. This does not happen for inline functions. */
15590 cfun->machine->return_used_this_function = 0;
15592 else /* TARGET_32BIT */
15594 /* We need to take into account any stack-frame rounding. */
15595 offsets = arm_get_frame_offsets ();
15597 gcc_assert (!use_return_insn (FALSE, NULL)
15598 || (cfun->machine->return_used_this_function != 0)
15599 || offsets->saved_regs == offsets->outgoing_args
15600 || frame_pointer_needed);
15602 /* Reset the ARM-specific per-function variables. */
15603 after_arm_reorg = 0;
15607 /* Generate and emit an insn that we will recognize as a push_multi.
15608 Unfortunately, since this insn does not reflect very well the actual
15609 semantics of the operation, we need to annotate the insn for the benefit
15610 of DWARF2 frame unwind information. */
15612 emit_multi_reg_push (unsigned long mask)
15615 int num_dwarf_regs;
15619 int dwarf_par_index;
15622 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15623 if (mask & (1 << i))
15626 gcc_assert (num_regs && num_regs <= 16);
15628 /* We don't record the PC in the dwarf frame information. */
15629 num_dwarf_regs = num_regs;
15630 if (mask & (1 << PC_REGNUM))
15633 /* For the body of the insn we are going to generate an UNSPEC in
15634 parallel with several USEs. This allows the insn to be recognized
15635 by the push_multi pattern in the arm.md file.
15637 The body of the insn looks something like this:
15640 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15641 (const_int:SI <num>)))
15642 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15648 For the frame note however, we try to be more explicit and actually
15649 show each register being stored into the stack frame, plus a (single)
15650 decrement of the stack pointer. We do it this way in order to be
15651 friendly to the stack unwinding code, which only wants to see a single
15652 stack decrement per instruction. The RTL we generate for the note looks
15653 something like this:
15656 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15657 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15658 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15659 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15663 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15664 instead we'd have a parallel expression detailing all
15665 the stores to the various memory addresses so that debug
15666 information is more up-to-date. Remember however while writing
15667 this to take care of the constraints with the push instruction.
15669 Note also that this has to be taken care of for the VFP registers.
15671 For more see PR43399. */
15673 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15674 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15675 dwarf_par_index = 1;
15677 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15679 if (mask & (1 << i))
15681 reg = gen_rtx_REG (SImode, i);
15683 XVECEXP (par, 0, 0)
15684 = gen_rtx_SET (VOIDmode,
15687 gen_rtx_PRE_MODIFY (Pmode,
15690 (stack_pointer_rtx,
15693 gen_rtx_UNSPEC (BLKmode,
15694 gen_rtvec (1, reg),
15695 UNSPEC_PUSH_MULT));
15697 if (i != PC_REGNUM)
15699 tmp = gen_rtx_SET (VOIDmode,
15700 gen_frame_mem (SImode, stack_pointer_rtx),
15702 RTX_FRAME_RELATED_P (tmp) = 1;
15703 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15711 for (j = 1, i++; j < num_regs; i++)
15713 if (mask & (1 << i))
15715 reg = gen_rtx_REG (SImode, i);
15717 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15719 if (i != PC_REGNUM)
15722 = gen_rtx_SET (VOIDmode,
15725 plus_constant (stack_pointer_rtx,
15728 RTX_FRAME_RELATED_P (tmp) = 1;
15729 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15736 par = emit_insn (par);
15738 tmp = gen_rtx_SET (VOIDmode,
15740 plus_constant (stack_pointer_rtx, -4 * num_regs));
15741 RTX_FRAME_RELATED_P (tmp) = 1;
15742 XVECEXP (dwarf, 0, 0) = tmp;
15744 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15749 /* Calculate the size of the return value that is passed in registers. */
15751 arm_size_return_regs (void)
15753 enum machine_mode mode;
15755 if (crtl->return_rtx != 0)
15756 mode = GET_MODE (crtl->return_rtx);
15758 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15760 return GET_MODE_SIZE (mode);
15764 emit_sfm (int base_reg, int count)
15771 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
15772 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
15774 reg = gen_rtx_REG (XFmode, base_reg++);
15776 XVECEXP (par, 0, 0)
15777 = gen_rtx_SET (VOIDmode,
15780 gen_rtx_PRE_MODIFY (Pmode,
15783 (stack_pointer_rtx,
15786 gen_rtx_UNSPEC (BLKmode,
15787 gen_rtvec (1, reg),
15788 UNSPEC_PUSH_MULT));
15789 tmp = gen_rtx_SET (VOIDmode,
15790 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
15791 RTX_FRAME_RELATED_P (tmp) = 1;
15792 XVECEXP (dwarf, 0, 1) = tmp;
15794 for (i = 1; i < count; i++)
15796 reg = gen_rtx_REG (XFmode, base_reg++);
15797 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
15799 tmp = gen_rtx_SET (VOIDmode,
15800 gen_frame_mem (XFmode,
15801 plus_constant (stack_pointer_rtx,
15804 RTX_FRAME_RELATED_P (tmp) = 1;
15805 XVECEXP (dwarf, 0, i + 1) = tmp;
15808 tmp = gen_rtx_SET (VOIDmode,
15810 plus_constant (stack_pointer_rtx, -12 * count));
15812 RTX_FRAME_RELATED_P (tmp) = 1;
15813 XVECEXP (dwarf, 0, 0) = tmp;
15815 par = emit_insn (par);
15816 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15822 /* Return true if the current function needs to save/restore LR. */
15825 thumb_force_lr_save (void)
15827 return !cfun->machine->lr_save_eliminated
15828 && (!leaf_function_p ()
15829 || thumb_far_jump_used_p ()
15830 || df_regs_ever_live_p (LR_REGNUM));
15834 /* Return true if r3 is used by any of the tail call insns in the
15835 current function. */
15838 any_sibcall_uses_r3 (void)
15843 if (!crtl->tail_call_emit)
15845 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15846 if (e->flags & EDGE_SIBCALL)
15848 rtx call = BB_END (e->src);
15849 if (!CALL_P (call))
15850 call = prev_nonnote_nondebug_insn (call);
15851 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
15852 if (find_regno_fusage (call, USE, 3))
15859 /* Compute the distance from register FROM to register TO.
15860 These can be the arg pointer (26), the soft frame pointer (25),
15861 the stack pointer (13) or the hard frame pointer (11).
15862 In thumb mode r7 is used as the soft frame pointer, if needed.
15863 Typical stack layout looks like this:
15865 old stack pointer -> | |
15868 | | saved arguments for
15869 | | vararg functions
15872 hard FP & arg pointer -> | | \
15880 soft frame pointer -> | | /
15885 locals base pointer -> | | /
15890 current stack pointer -> | | /
15893 For a given function some or all of these stack components
15894 may not be needed, giving rise to the possibility of
15895 eliminating some of the registers.
15897 The values returned by this function must reflect the behavior
15898 of arm_expand_prologue() and arm_compute_save_reg_mask().
15900 The sign of the number returned reflects the direction of stack
15901 growth, so the values are positive for all eliminations except
15902 from the soft frame pointer to the hard frame pointer.
15904 SFP may point just inside the local variables block to ensure correct
15908 /* Calculate stack offsets. These are used to calculate register elimination
15909 offsets and in prologue/epilogue code. Also calculates which registers
15910 should be saved. */
15912 static arm_stack_offsets *
15913 arm_get_frame_offsets (void)
15915 struct arm_stack_offsets *offsets;
15916 unsigned long func_type;
15920 HOST_WIDE_INT frame_size;
15923 offsets = &cfun->machine->stack_offsets;
15925 /* We need to know if we are a leaf function. Unfortunately, it
15926 is possible to be called after start_sequence has been called,
15927 which causes get_insns to return the insns for the sequence,
15928 not the function, which will cause leaf_function_p to return
15929 the incorrect result.
15931 to know about leaf functions once reload has completed, and the
15932 frame size cannot be changed after that time, so we can safely
15933 use the cached value. */
15935 if (reload_completed)
15938 /* Initially this is the size of the local variables. It will translated
15939 into an offset once we have determined the size of preceding data. */
15940 frame_size = ROUND_UP_WORD (get_frame_size ());
15942 leaf = leaf_function_p ();
15944 /* Space for variadic functions. */
15945 offsets->saved_args = crtl->args.pretend_args_size;
15947 /* In Thumb mode this is incorrect, but never used. */
15948 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15949 arm_compute_static_chain_stack_bytes();
15953 unsigned int regno;
15955 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15956 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15957 saved = core_saved;
15959 /* We know that SP will be doubleword aligned on entry, and we must
15960 preserve that condition at any subroutine call. We also require the
15961 soft frame pointer to be doubleword aligned. */
15963 if (TARGET_REALLY_IWMMXT)
15965 /* Check for the call-saved iWMMXt registers. */
15966 for (regno = FIRST_IWMMXT_REGNUM;
15967 regno <= LAST_IWMMXT_REGNUM;
15969 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15973 func_type = arm_current_func_type ();
15974 if (! IS_VOLATILE (func_type))
15976 /* Space for saved FPA registers. */
15977 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15978 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15981 /* Space for saved VFP registers. */
15982 if (TARGET_HARD_FLOAT && TARGET_VFP)
15983 saved += arm_get_vfp_saved_size ();
15986 else /* TARGET_THUMB1 */
15988 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15989 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15990 saved = core_saved;
15991 if (TARGET_BACKTRACE)
15995 /* Saved registers include the stack frame. */
15996 offsets->saved_regs = offsets->saved_args + saved +
15997 arm_compute_static_chain_stack_bytes();
15998 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15999 /* A leaf function does not need any stack alignment if it has nothing
16001 if (leaf && frame_size == 0
16002 /* However if it calls alloca(), we have a dynamically allocated
16003 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16004 && ! cfun->calls_alloca)
16006 offsets->outgoing_args = offsets->soft_frame;
16007 offsets->locals_base = offsets->soft_frame;
16011 /* Ensure SFP has the correct alignment. */
16012 if (ARM_DOUBLEWORD_ALIGN
16013 && (offsets->soft_frame & 7))
16015 offsets->soft_frame += 4;
16016 /* Try to align stack by pushing an extra reg. Don't bother doing this
16017 when there is a stack frame as the alignment will be rolled into
16018 the normal stack adjustment. */
16019 if (frame_size + crtl->outgoing_args_size == 0)
16023 /* If it is safe to use r3, then do so. This sometimes
16024 generates better code on Thumb-2 by avoiding the need to
16025 use 32-bit push/pop instructions. */
16026 if (! any_sibcall_uses_r3 ()
16027 && arm_size_return_regs () <= 12
16028 && (offsets->saved_regs_mask & (1 << 3)) == 0)
16033 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
16035 if ((offsets->saved_regs_mask & (1 << i)) == 0)
16044 offsets->saved_regs += 4;
16045 offsets->saved_regs_mask |= (1 << reg);
16050 offsets->locals_base = offsets->soft_frame + frame_size;
16051 offsets->outgoing_args = (offsets->locals_base
16052 + crtl->outgoing_args_size);
16054 if (ARM_DOUBLEWORD_ALIGN)
16056 /* Ensure SP remains doubleword aligned. */
16057 if (offsets->outgoing_args & 7)
16058 offsets->outgoing_args += 4;
16059 gcc_assert (!(offsets->outgoing_args & 7));
16066 /* Calculate the relative offsets for the different stack pointers. Positive
16067 offsets are in the direction of stack growth. */
16070 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16072 arm_stack_offsets *offsets;
16074 offsets = arm_get_frame_offsets ();
16076 /* OK, now we have enough information to compute the distances.
16077 There must be an entry in these switch tables for each pair
16078 of registers in ELIMINABLE_REGS, even if some of the entries
16079 seem to be redundant or useless. */
16082 case ARG_POINTER_REGNUM:
16085 case THUMB_HARD_FRAME_POINTER_REGNUM:
16088 case FRAME_POINTER_REGNUM:
16089 /* This is the reverse of the soft frame pointer
16090 to hard frame pointer elimination below. */
16091 return offsets->soft_frame - offsets->saved_args;
16093 case ARM_HARD_FRAME_POINTER_REGNUM:
16094 /* This is only non-zero in the case where the static chain register
16095 is stored above the frame. */
16096 return offsets->frame - offsets->saved_args - 4;
16098 case STACK_POINTER_REGNUM:
16099 /* If nothing has been pushed on the stack at all
16100 then this will return -4. This *is* correct! */
16101 return offsets->outgoing_args - (offsets->saved_args + 4);
16104 gcc_unreachable ();
16106 gcc_unreachable ();
16108 case FRAME_POINTER_REGNUM:
16111 case THUMB_HARD_FRAME_POINTER_REGNUM:
16114 case ARM_HARD_FRAME_POINTER_REGNUM:
16115 /* The hard frame pointer points to the top entry in the
16116 stack frame. The soft frame pointer to the bottom entry
16117 in the stack frame. If there is no stack frame at all,
16118 then they are identical. */
16120 return offsets->frame - offsets->soft_frame;
16122 case STACK_POINTER_REGNUM:
16123 return offsets->outgoing_args - offsets->soft_frame;
16126 gcc_unreachable ();
16128 gcc_unreachable ();
16131 /* You cannot eliminate from the stack pointer.
16132 In theory you could eliminate from the hard frame
16133 pointer to the stack pointer, but this will never
16134 happen, since if a stack frame is not needed the
16135 hard frame pointer will never be used. */
16136 gcc_unreachable ();
16140 /* Given FROM and TO register numbers, say whether this elimination is
16141 allowed. Frame pointer elimination is automatically handled.
16143 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16144 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16145 pointer, we must eliminate FRAME_POINTER_REGNUM into
16146 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16147 ARG_POINTER_REGNUM. */
16150 arm_can_eliminate (const int from, const int to)
16152 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
16153 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
16154 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
16155 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
16159 /* Emit RTL to save coprocessor registers on function entry. Returns the
16160 number of bytes pushed. */
16163 arm_save_coproc_regs(void)
16165 int saved_size = 0;
16167 unsigned start_reg;
16170 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
16171 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16173 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16174 insn = gen_rtx_MEM (V2SImode, insn);
16175 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
16176 RTX_FRAME_RELATED_P (insn) = 1;
16180 /* Save any floating point call-saved registers used by this
16182 if (TARGET_FPA_EMU2)
16184 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16185 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16187 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16188 insn = gen_rtx_MEM (XFmode, insn);
16189 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
16190 RTX_FRAME_RELATED_P (insn) = 1;
16196 start_reg = LAST_FPA_REGNUM;
16198 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16200 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16202 if (start_reg - reg == 3)
16204 insn = emit_sfm (reg, 4);
16205 RTX_FRAME_RELATED_P (insn) = 1;
16207 start_reg = reg - 1;
16212 if (start_reg != reg)
16214 insn = emit_sfm (reg + 1, start_reg - reg);
16215 RTX_FRAME_RELATED_P (insn) = 1;
16216 saved_size += (start_reg - reg) * 12;
16218 start_reg = reg - 1;
16222 if (start_reg != reg)
16224 insn = emit_sfm (reg + 1, start_reg - reg);
16225 saved_size += (start_reg - reg) * 12;
16226 RTX_FRAME_RELATED_P (insn) = 1;
16229 if (TARGET_HARD_FLOAT && TARGET_VFP)
16231 start_reg = FIRST_VFP_REGNUM;
16233 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
16235 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16236 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
16238 if (start_reg != reg)
16239 saved_size += vfp_emit_fstmd (start_reg,
16240 (reg - start_reg) / 2);
16241 start_reg = reg + 2;
16244 if (start_reg != reg)
16245 saved_size += vfp_emit_fstmd (start_reg,
16246 (reg - start_reg) / 2);
16252 /* Set the Thumb frame pointer from the stack pointer. */
16255 thumb_set_frame_pointer (arm_stack_offsets *offsets)
16257 HOST_WIDE_INT amount;
16260 amount = offsets->outgoing_args - offsets->locals_base;
16262 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16263 stack_pointer_rtx, GEN_INT (amount)));
16266 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
16267 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16268 expects the first two operands to be the same. */
16271 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16273 hard_frame_pointer_rtx));
16277 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16278 hard_frame_pointer_rtx,
16279 stack_pointer_rtx));
16281 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
16282 plus_constant (stack_pointer_rtx, amount));
16283 RTX_FRAME_RELATED_P (dwarf) = 1;
16284 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16287 RTX_FRAME_RELATED_P (insn) = 1;
16290 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16293 arm_expand_prologue (void)
16298 unsigned long live_regs_mask;
16299 unsigned long func_type;
16301 int saved_pretend_args = 0;
16302 int saved_regs = 0;
16303 unsigned HOST_WIDE_INT args_to_push;
16304 arm_stack_offsets *offsets;
16306 func_type = arm_current_func_type ();
16308 /* Naked functions don't have prologues. */
16309 if (IS_NAKED (func_type))
16312 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16313 args_to_push = crtl->args.pretend_args_size;
16315 /* Compute which register we will have to save onto the stack. */
16316 offsets = arm_get_frame_offsets ();
16317 live_regs_mask = offsets->saved_regs_mask;
16319 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16321 if (IS_STACKALIGN (func_type))
16325 /* Handle a word-aligned stack pointer. We generate the following:
16330 <save and restore r0 in normal prologue/epilogue>
16334 The unwinder doesn't need to know about the stack realignment.
16335 Just tell it we saved SP in r0. */
16336 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16338 r0 = gen_rtx_REG (SImode, 0);
16339 r1 = gen_rtx_REG (SImode, 1);
16341 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16342 RTX_FRAME_RELATED_P (insn) = 1;
16343 add_reg_note (insn, REG_CFA_REGISTER, NULL);
16345 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16347 /* ??? The CFA changes here, which may cause GDB to conclude that it
16348 has entered a different function. That said, the unwind info is
16349 correct, individually, before and after this instruction because
16350 we've described the save of SP, which will override the default
16351 handling of SP as restoring from the CFA. */
16352 emit_insn (gen_movsi (stack_pointer_rtx, r1));
16355 /* For APCS frames, if IP register is clobbered
16356 when creating frame, save that register in a special
16358 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16360 if (IS_INTERRUPT (func_type))
16362 /* Interrupt functions must not corrupt any registers.
16363 Creating a frame pointer however, corrupts the IP
16364 register, so we must push it first. */
16365 emit_multi_reg_push (1 << IP_REGNUM);
16367 /* Do not set RTX_FRAME_RELATED_P on this insn.
16368 The dwarf stack unwinding code only wants to see one
16369 stack decrement per function, and this is not it. If
16370 this instruction is labeled as being part of the frame
16371 creation sequence then dwarf2out_frame_debug_expr will
16372 die when it encounters the assignment of IP to FP
16373 later on, since the use of SP here establishes SP as
16374 the CFA register and not IP.
16376 Anyway this instruction is not really part of the stack
16377 frame creation although it is part of the prologue. */
16379 else if (IS_NESTED (func_type))
16381 /* The Static chain register is the same as the IP register
16382 used as a scratch register during stack frame creation.
16383 To get around this need to find somewhere to store IP
16384 whilst the frame is being created. We try the following
16387 1. The last argument register.
16388 2. A slot on the stack above the frame. (This only
16389 works if the function is not a varargs function).
16390 3. Register r3, after pushing the argument registers
16393 Note - we only need to tell the dwarf2 backend about the SP
16394 adjustment in the second variant; the static chain register
16395 doesn't need to be unwound, as it doesn't contain a value
16396 inherited from the caller. */
16398 if (df_regs_ever_live_p (3) == false)
16399 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16400 else if (args_to_push == 0)
16404 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16407 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
16408 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
16411 /* Just tell the dwarf backend that we adjusted SP. */
16412 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16413 plus_constant (stack_pointer_rtx,
16415 RTX_FRAME_RELATED_P (insn) = 1;
16416 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16420 /* Store the args on the stack. */
16421 if (cfun->machine->uses_anonymous_args)
16422 insn = emit_multi_reg_push
16423 ((0xf0 >> (args_to_push / 4)) & 0xf);
16426 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16427 GEN_INT (- args_to_push)));
16429 RTX_FRAME_RELATED_P (insn) = 1;
16431 saved_pretend_args = 1;
16432 fp_offset = args_to_push;
16435 /* Now reuse r3 to preserve IP. */
16436 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16440 insn = emit_set_insn (ip_rtx,
16441 plus_constant (stack_pointer_rtx, fp_offset));
16442 RTX_FRAME_RELATED_P (insn) = 1;
16447 /* Push the argument registers, or reserve space for them. */
16448 if (cfun->machine->uses_anonymous_args)
16449 insn = emit_multi_reg_push
16450 ((0xf0 >> (args_to_push / 4)) & 0xf);
16453 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16454 GEN_INT (- args_to_push)));
16455 RTX_FRAME_RELATED_P (insn) = 1;
16458 /* If this is an interrupt service routine, and the link register
16459 is going to be pushed, and we're not generating extra
16460 push of IP (needed when frame is needed and frame layout if apcs),
16461 subtracting four from LR now will mean that the function return
16462 can be done with a single instruction. */
16463 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
16464 && (live_regs_mask & (1 << LR_REGNUM)) != 0
16465 && !(frame_pointer_needed && TARGET_APCS_FRAME)
16468 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
16470 emit_set_insn (lr, plus_constant (lr, -4));
16473 if (live_regs_mask)
16475 saved_regs += bit_count (live_regs_mask) * 4;
16476 if (optimize_size && !frame_pointer_needed
16477 && saved_regs == offsets->saved_regs - offsets->saved_args)
16479 /* If no coprocessor registers are being pushed and we don't have
16480 to worry about a frame pointer then push extra registers to
16481 create the stack frame. This is done is a way that does not
16482 alter the frame layout, so is independent of the epilogue. */
16486 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
16488 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
16489 if (frame && n * 4 >= frame)
16492 live_regs_mask |= (1 << n) - 1;
16493 saved_regs += frame;
16496 insn = emit_multi_reg_push (live_regs_mask);
16497 RTX_FRAME_RELATED_P (insn) = 1;
16500 if (! IS_VOLATILE (func_type))
16501 saved_regs += arm_save_coproc_regs ();
16503 if (frame_pointer_needed && TARGET_ARM)
16505 /* Create the new frame pointer. */
16506 if (TARGET_APCS_FRAME)
16508 insn = GEN_INT (-(4 + args_to_push + fp_offset));
16509 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
16510 RTX_FRAME_RELATED_P (insn) = 1;
16512 if (IS_NESTED (func_type))
16514 /* Recover the static chain register. */
16515 if (!df_regs_ever_live_p (3)
16516 || saved_pretend_args)
16517 insn = gen_rtx_REG (SImode, 3);
16518 else /* if (crtl->args.pretend_args_size == 0) */
16520 insn = plus_constant (hard_frame_pointer_rtx, 4);
16521 insn = gen_frame_mem (SImode, insn);
16523 emit_set_insn (ip_rtx, insn);
16524 /* Add a USE to stop propagate_one_insn() from barfing. */
16525 emit_insn (gen_prologue_use (ip_rtx));
16530 insn = GEN_INT (saved_regs - 4);
16531 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16532 stack_pointer_rtx, insn));
16533 RTX_FRAME_RELATED_P (insn) = 1;
16537 if (flag_stack_usage_info)
16538 current_function_static_stack_size
16539 = offsets->outgoing_args - offsets->saved_args;
16541 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
16543 /* This add can produce multiple insns for a large constant, so we
16544 need to get tricky. */
16545 rtx last = get_last_insn ();
16547 amount = GEN_INT (offsets->saved_args + saved_regs
16548 - offsets->outgoing_args);
16550 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16554 last = last ? NEXT_INSN (last) : get_insns ();
16555 RTX_FRAME_RELATED_P (last) = 1;
16557 while (last != insn);
16559 /* If the frame pointer is needed, emit a special barrier that
16560 will prevent the scheduler from moving stores to the frame
16561 before the stack adjustment. */
16562 if (frame_pointer_needed)
16563 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
16564 hard_frame_pointer_rtx));
16568 if (frame_pointer_needed && TARGET_THUMB2)
16569 thumb_set_frame_pointer (offsets);
16571 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16573 unsigned long mask;
16575 mask = live_regs_mask;
16576 mask &= THUMB2_WORK_REGS;
16577 if (!IS_NESTED (func_type))
16578 mask |= (1 << IP_REGNUM);
16579 arm_load_pic_register (mask);
16582 /* If we are profiling, make sure no instructions are scheduled before
16583 the call to mcount. Similarly if the user has requested no
16584 scheduling in the prolog. Similarly if we want non-call exceptions
16585 using the EABI unwinder, to prevent faulting instructions from being
16586 swapped with a stack adjustment. */
16587 if (crtl->profile || !TARGET_SCHED_PROLOG
16588 || (arm_except_unwind_info (&global_options) == UI_TARGET
16589 && cfun->can_throw_non_call_exceptions))
16590 emit_insn (gen_blockage ());
16592 /* If the link register is being kept alive, with the return address in it,
16593 then make sure that it does not get reused by the ce2 pass. */
16594 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
16595 cfun->machine->lr_save_eliminated = 1;
16598 /* Print condition code to STREAM. Helper function for arm_print_operand. */
16600 arm_print_condition (FILE *stream)
16602 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
16604 /* Branch conversion is not implemented for Thumb-2. */
16607 output_operand_lossage ("predicated Thumb instruction");
16610 if (current_insn_predicate != NULL)
16612 output_operand_lossage
16613 ("predicated instruction in conditional sequence");
16617 fputs (arm_condition_codes[arm_current_cc], stream);
16619 else if (current_insn_predicate)
16621 enum arm_cond_code code;
16625 output_operand_lossage ("predicated Thumb instruction");
16629 code = get_arm_condition_code (current_insn_predicate);
16630 fputs (arm_condition_codes[code], stream);
16635 /* If CODE is 'd', then the X is a condition operand and the instruction
16636 should only be executed if the condition is true.
16637 if CODE is 'D', then the X is a condition operand and the instruction
16638 should only be executed if the condition is false: however, if the mode
16639 of the comparison is CCFPEmode, then always execute the instruction -- we
16640 do this because in these circumstances !GE does not necessarily imply LT;
16641 in these cases the instruction pattern will take care to make sure that
16642 an instruction containing %d will follow, thereby undoing the effects of
16643 doing this instruction unconditionally.
16644 If CODE is 'N' then X is a floating point operand that must be negated
16646 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16647 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16649 arm_print_operand (FILE *stream, rtx x, int code)
16654 fputs (ASM_COMMENT_START, stream);
16658 fputs (user_label_prefix, stream);
16662 fputs (REGISTER_PREFIX, stream);
16666 arm_print_condition (stream);
16670 /* Nothing in unified syntax, otherwise the current condition code. */
16671 if (!TARGET_UNIFIED_ASM)
16672 arm_print_condition (stream);
16676 /* The current condition code in unified syntax, otherwise nothing. */
16677 if (TARGET_UNIFIED_ASM)
16678 arm_print_condition (stream);
16682 /* The current condition code for a condition code setting instruction.
16683 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16684 if (TARGET_UNIFIED_ASM)
16686 fputc('s', stream);
16687 arm_print_condition (stream);
16691 arm_print_condition (stream);
16692 fputc('s', stream);
16697 /* If the instruction is conditionally executed then print
16698 the current condition code, otherwise print 's'. */
16699 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16700 if (current_insn_predicate)
16701 arm_print_condition (stream);
16703 fputc('s', stream);
16706 /* %# is a "break" sequence. It doesn't output anything, but is used to
16707 separate e.g. operand numbers from following text, if that text consists
16708 of further digits which we don't want to be part of the operand
16716 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16717 r = real_value_negate (&r);
16718 fprintf (stream, "%s", fp_const_from_val (&r));
16722 /* An integer or symbol address without a preceding # sign. */
16724 switch (GET_CODE (x))
16727 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16731 output_addr_const (stream, x);
16735 if (GET_CODE (XEXP (x, 0)) == PLUS
16736 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
16738 output_addr_const (stream, x);
16741 /* Fall through. */
16744 output_operand_lossage ("Unsupported operand for code '%c'", code);
16749 if (GET_CODE (x) == CONST_INT)
16752 val = ARM_SIGN_EXTEND (~INTVAL (x));
16753 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16757 putc ('~', stream);
16758 output_addr_const (stream, x);
16763 /* The low 16 bits of an immediate constant. */
16764 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16768 fprintf (stream, "%s", arithmetic_instr (x, 1));
16771 /* Truncate Cirrus shift counts. */
16773 if (GET_CODE (x) == CONST_INT)
16775 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
16778 arm_print_operand (stream, x, 0);
16782 fprintf (stream, "%s", arithmetic_instr (x, 0));
16790 if (!shift_operator (x, SImode))
16792 output_operand_lossage ("invalid shift operand");
16796 shift = shift_op (x, &val);
16800 fprintf (stream, ", %s ", shift);
16802 arm_print_operand (stream, XEXP (x, 1), 0);
16804 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16809 /* An explanation of the 'Q', 'R' and 'H' register operands:
16811 In a pair of registers containing a DI or DF value the 'Q'
16812 operand returns the register number of the register containing
16813 the least significant part of the value. The 'R' operand returns
16814 the register number of the register containing the most
16815 significant part of the value.
16817 The 'H' operand returns the higher of the two register numbers.
16818 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16819 same as the 'Q' operand, since the most significant part of the
16820 value is held in the lower number register. The reverse is true
16821 on systems where WORDS_BIG_ENDIAN is false.
16823 The purpose of these operands is to distinguish between cases
16824 where the endian-ness of the values is important (for example
16825 when they are added together), and cases where the endian-ness
16826 is irrelevant, but the order of register operations is important.
16827 For example when loading a value from memory into a register
16828 pair, the endian-ness does not matter. Provided that the value
16829 from the lower memory address is put into the lower numbered
16830 register, and the value from the higher address is put into the
16831 higher numbered register, the load will work regardless of whether
16832 the value being loaded is big-wordian or little-wordian. The
16833 order of the two register loads can matter however, if the address
16834 of the memory location is actually held in one of the registers
16835 being overwritten by the load.
16837 The 'Q' and 'R' constraints are also available for 64-bit
16840 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16842 rtx part = gen_lowpart (SImode, x);
16843 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16847 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16849 output_operand_lossage ("invalid operand for code '%c'", code);
16853 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16857 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16859 enum machine_mode mode = GET_MODE (x);
16862 if (mode == VOIDmode)
16864 part = gen_highpart_mode (SImode, mode, x);
16865 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16869 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16871 output_operand_lossage ("invalid operand for code '%c'", code);
16875 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16879 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16881 output_operand_lossage ("invalid operand for code '%c'", code);
16885 asm_fprintf (stream, "%r", REGNO (x) + 1);
16889 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16891 output_operand_lossage ("invalid operand for code '%c'", code);
16895 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16899 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16901 output_operand_lossage ("invalid operand for code '%c'", code);
16905 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16909 asm_fprintf (stream, "%r",
16910 GET_CODE (XEXP (x, 0)) == REG
16911 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16915 asm_fprintf (stream, "{%r-%r}",
16917 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16920 /* Like 'M', but writing doubleword vector registers, for use by Neon
16924 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16925 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16927 asm_fprintf (stream, "{d%d}", regno);
16929 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16934 /* CONST_TRUE_RTX means always -- that's the default. */
16935 if (x == const_true_rtx)
16938 if (!COMPARISON_P (x))
16940 output_operand_lossage ("invalid operand for code '%c'", code);
16944 fputs (arm_condition_codes[get_arm_condition_code (x)],
16949 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16950 want to do that. */
16951 if (x == const_true_rtx)
16953 output_operand_lossage ("instruction never executed");
16956 if (!COMPARISON_P (x))
16958 output_operand_lossage ("invalid operand for code '%c'", code);
16962 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16963 (get_arm_condition_code (x))],
16967 /* Cirrus registers can be accessed in a variety of ways:
16968 single floating point (f)
16969 double floating point (d)
16971 64bit integer (dx). */
16972 case 'W': /* Cirrus register in F mode. */
16973 case 'X': /* Cirrus register in D mode. */
16974 case 'Y': /* Cirrus register in FX mode. */
16975 case 'Z': /* Cirrus register in DX mode. */
16976 gcc_assert (GET_CODE (x) == REG
16977 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16979 fprintf (stream, "mv%s%s",
16981 : code == 'X' ? "d"
16982 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16986 /* Print cirrus register in the mode specified by the register's mode. */
16989 int mode = GET_MODE (x);
16991 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16993 output_operand_lossage ("invalid operand for code '%c'", code);
16997 fprintf (stream, "mv%s%s",
16998 mode == DFmode ? "d"
16999 : mode == SImode ? "fx"
17000 : mode == DImode ? "dx"
17001 : "f", reg_names[REGNO (x)] + 2);
17007 if (GET_CODE (x) != REG
17008 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17009 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17010 /* Bad value for wCG register number. */
17012 output_operand_lossage ("invalid operand for code '%c'", code);
17017 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17020 /* Print an iWMMXt control register name. */
17022 if (GET_CODE (x) != CONST_INT
17024 || INTVAL (x) >= 16)
17025 /* Bad value for wC register number. */
17027 output_operand_lossage ("invalid operand for code '%c'", code);
17033 static const char * wc_reg_names [16] =
17035 "wCID", "wCon", "wCSSF", "wCASF",
17036 "wC4", "wC5", "wC6", "wC7",
17037 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17038 "wC12", "wC13", "wC14", "wC15"
17041 fprintf (stream, wc_reg_names [INTVAL (x)]);
17045 /* Print the high single-precision register of a VFP double-precision
17049 int mode = GET_MODE (x);
17052 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
17054 output_operand_lossage ("invalid operand for code '%c'", code);
17059 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
17061 output_operand_lossage ("invalid operand for code '%c'", code);
17065 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
17069 /* Print a VFP/Neon double precision or quad precision register name. */
17073 int mode = GET_MODE (x);
17074 int is_quad = (code == 'q');
17077 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
17079 output_operand_lossage ("invalid operand for code '%c'", code);
17083 if (GET_CODE (x) != REG
17084 || !IS_VFP_REGNUM (REGNO (x)))
17086 output_operand_lossage ("invalid operand for code '%c'", code);
17091 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
17092 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
17094 output_operand_lossage ("invalid operand for code '%c'", code);
17098 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
17099 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
17103 /* These two codes print the low/high doubleword register of a Neon quad
17104 register, respectively. For pair-structure types, can also print
17105 low/high quadword registers. */
17109 int mode = GET_MODE (x);
17112 if ((GET_MODE_SIZE (mode) != 16
17113 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
17115 output_operand_lossage ("invalid operand for code '%c'", code);
17120 if (!NEON_REGNO_OK_FOR_QUAD (regno))
17122 output_operand_lossage ("invalid operand for code '%c'", code);
17126 if (GET_MODE_SIZE (mode) == 16)
17127 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
17128 + (code == 'f' ? 1 : 0));
17130 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
17131 + (code == 'f' ? 1 : 0));
17135 /* Print a VFPv3 floating-point constant, represented as an integer
17139 int index = vfp3_const_double_index (x);
17140 gcc_assert (index != -1);
17141 fprintf (stream, "%d", index);
17145 /* Print bits representing opcode features for Neon.
17147 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17148 and polynomials as unsigned.
17150 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17152 Bit 2 is 1 for rounding functions, 0 otherwise. */
17154 /* Identify the type as 's', 'u', 'p' or 'f'. */
17157 HOST_WIDE_INT bits = INTVAL (x);
17158 fputc ("uspf"[bits & 3], stream);
17162 /* Likewise, but signed and unsigned integers are both 'i'. */
17165 HOST_WIDE_INT bits = INTVAL (x);
17166 fputc ("iipf"[bits & 3], stream);
17170 /* As for 'T', but emit 'u' instead of 'p'. */
17173 HOST_WIDE_INT bits = INTVAL (x);
17174 fputc ("usuf"[bits & 3], stream);
17178 /* Bit 2: rounding (vs none). */
17181 HOST_WIDE_INT bits = INTVAL (x);
17182 fputs ((bits & 4) != 0 ? "r" : "", stream);
17186 /* Memory operand for vld1/vst1 instruction. */
17190 bool postinc = FALSE;
17191 unsigned align, memsize, align_bits;
17193 gcc_assert (GET_CODE (x) == MEM);
17194 addr = XEXP (x, 0);
17195 if (GET_CODE (addr) == POST_INC)
17198 addr = XEXP (addr, 0);
17200 asm_fprintf (stream, "[%r", REGNO (addr));
17202 /* We know the alignment of this access, so we can emit a hint in the
17203 instruction (for some alignments) as an aid to the memory subsystem
17205 align = MEM_ALIGN (x) >> 3;
17206 memsize = MEM_SIZE (x);
17208 /* Only certain alignment specifiers are supported by the hardware. */
17209 if (memsize == 16 && (align % 32) == 0)
17211 else if ((memsize == 8 || memsize == 16) && (align % 16) == 0)
17213 else if ((align % 8) == 0)
17218 if (align_bits != 0)
17219 asm_fprintf (stream, ":%d", align_bits);
17221 asm_fprintf (stream, "]");
17224 fputs("!", stream);
17232 gcc_assert (GET_CODE (x) == MEM);
17233 addr = XEXP (x, 0);
17234 gcc_assert (GET_CODE (addr) == REG);
17235 asm_fprintf (stream, "[%r]", REGNO (addr));
17239 /* Translate an S register number into a D register number and element index. */
17242 int mode = GET_MODE (x);
17245 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
17247 output_operand_lossage ("invalid operand for code '%c'", code);
17252 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17254 output_operand_lossage ("invalid operand for code '%c'", code);
17258 regno = regno - FIRST_VFP_REGNUM;
17259 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
17263 /* Register specifier for vld1.16/vst1.16. Translate the S register
17264 number into a D register number and element index. */
17267 int mode = GET_MODE (x);
17270 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
17272 output_operand_lossage ("invalid operand for code '%c'", code);
17277 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17279 output_operand_lossage ("invalid operand for code '%c'", code);
17283 regno = regno - FIRST_VFP_REGNUM;
17284 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
17291 output_operand_lossage ("missing operand");
17295 switch (GET_CODE (x))
17298 asm_fprintf (stream, "%r", REGNO (x));
17302 output_memory_reference_mode = GET_MODE (x);
17303 output_address (XEXP (x, 0));
17310 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17311 sizeof (fpstr), 0, 1);
17312 fprintf (stream, "#%s", fpstr);
17315 fprintf (stream, "#%s", fp_immediate_constant (x));
17319 gcc_assert (GET_CODE (x) != NEG);
17320 fputc ('#', stream);
17321 if (GET_CODE (x) == HIGH)
17323 fputs (":lower16:", stream);
17327 output_addr_const (stream, x);
17333 /* Target hook for printing a memory address. */
17335 arm_print_operand_address (FILE *stream, rtx x)
17339 int is_minus = GET_CODE (x) == MINUS;
17341 if (GET_CODE (x) == REG)
17342 asm_fprintf (stream, "[%r, #0]", REGNO (x));
17343 else if (GET_CODE (x) == PLUS || is_minus)
17345 rtx base = XEXP (x, 0);
17346 rtx index = XEXP (x, 1);
17347 HOST_WIDE_INT offset = 0;
17348 if (GET_CODE (base) != REG
17349 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
17351 /* Ensure that BASE is a register. */
17352 /* (one of them must be). */
17353 /* Also ensure the SP is not used as in index register. */
17358 switch (GET_CODE (index))
17361 offset = INTVAL (index);
17364 asm_fprintf (stream, "[%r, #%wd]",
17365 REGNO (base), offset);
17369 asm_fprintf (stream, "[%r, %s%r]",
17370 REGNO (base), is_minus ? "-" : "",
17380 asm_fprintf (stream, "[%r, %s%r",
17381 REGNO (base), is_minus ? "-" : "",
17382 REGNO (XEXP (index, 0)));
17383 arm_print_operand (stream, index, 'S');
17384 fputs ("]", stream);
17389 gcc_unreachable ();
17392 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
17393 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
17395 extern enum machine_mode output_memory_reference_mode;
17397 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17399 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
17400 asm_fprintf (stream, "[%r, #%s%d]!",
17401 REGNO (XEXP (x, 0)),
17402 GET_CODE (x) == PRE_DEC ? "-" : "",
17403 GET_MODE_SIZE (output_memory_reference_mode));
17405 asm_fprintf (stream, "[%r], #%s%d",
17406 REGNO (XEXP (x, 0)),
17407 GET_CODE (x) == POST_DEC ? "-" : "",
17408 GET_MODE_SIZE (output_memory_reference_mode));
17410 else if (GET_CODE (x) == PRE_MODIFY)
17412 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
17413 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17414 asm_fprintf (stream, "#%wd]!",
17415 INTVAL (XEXP (XEXP (x, 1), 1)));
17417 asm_fprintf (stream, "%r]!",
17418 REGNO (XEXP (XEXP (x, 1), 1)));
17420 else if (GET_CODE (x) == POST_MODIFY)
17422 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
17423 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17424 asm_fprintf (stream, "#%wd",
17425 INTVAL (XEXP (XEXP (x, 1), 1)));
17427 asm_fprintf (stream, "%r",
17428 REGNO (XEXP (XEXP (x, 1), 1)));
17430 else output_addr_const (stream, x);
17434 if (GET_CODE (x) == REG)
17435 asm_fprintf (stream, "[%r]", REGNO (x));
17436 else if (GET_CODE (x) == POST_INC)
17437 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
17438 else if (GET_CODE (x) == PLUS)
17440 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17441 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17442 asm_fprintf (stream, "[%r, #%wd]",
17443 REGNO (XEXP (x, 0)),
17444 INTVAL (XEXP (x, 1)));
17446 asm_fprintf (stream, "[%r, %r]",
17447 REGNO (XEXP (x, 0)),
17448 REGNO (XEXP (x, 1)));
17451 output_addr_const (stream, x);
17455 /* Target hook for indicating whether a punctuation character for
17456 TARGET_PRINT_OPERAND is valid. */
17458 arm_print_operand_punct_valid_p (unsigned char code)
17460 return (code == '@' || code == '|' || code == '.'
17461 || code == '(' || code == ')' || code == '#'
17462 || (TARGET_32BIT && (code == '?'))
17463 || (TARGET_THUMB2 && (code == '!'))
17464 || (TARGET_THUMB && (code == '_')));
17467 /* Target hook for assembling integer objects. The ARM version needs to
17468 handle word-sized values specially. */
17470 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
17472 enum machine_mode mode;
17474 if (size == UNITS_PER_WORD && aligned_p)
17476 fputs ("\t.word\t", asm_out_file);
17477 output_addr_const (asm_out_file, x);
17479 /* Mark symbols as position independent. We only do this in the
17480 .text segment, not in the .data segment. */
17481 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
17482 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
17484 /* See legitimize_pic_address for an explanation of the
17485 TARGET_VXWORKS_RTP check. */
17486 if (TARGET_VXWORKS_RTP
17487 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
17488 fputs ("(GOT)", asm_out_file);
17490 fputs ("(GOTOFF)", asm_out_file);
17492 fputc ('\n', asm_out_file);
17496 mode = GET_MODE (x);
17498 if (arm_vector_mode_supported_p (mode))
17502 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17504 units = CONST_VECTOR_NUNITS (x);
17505 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
17507 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17508 for (i = 0; i < units; i++)
17510 rtx elt = CONST_VECTOR_ELT (x, i);
17512 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
17515 for (i = 0; i < units; i++)
17517 rtx elt = CONST_VECTOR_ELT (x, i);
17518 REAL_VALUE_TYPE rval;
17520 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
17523 (rval, GET_MODE_INNER (mode),
17524 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
17530 return default_assemble_integer (x, size, aligned_p);
17534 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
17538 if (!TARGET_AAPCS_BASED)
17541 default_named_section_asm_out_constructor
17542 : default_named_section_asm_out_destructor) (symbol, priority);
17546 /* Put these in the .init_array section, using a special relocation. */
17547 if (priority != DEFAULT_INIT_PRIORITY)
17550 sprintf (buf, "%s.%.5u",
17551 is_ctor ? ".init_array" : ".fini_array",
17553 s = get_section (buf, SECTION_WRITE, NULL_TREE);
17560 switch_to_section (s);
17561 assemble_align (POINTER_SIZE);
17562 fputs ("\t.word\t", asm_out_file);
17563 output_addr_const (asm_out_file, symbol);
17564 fputs ("(target1)\n", asm_out_file);
17567 /* Add a function to the list of static constructors. */
17570 arm_elf_asm_constructor (rtx symbol, int priority)
17572 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
17575 /* Add a function to the list of static destructors. */
17578 arm_elf_asm_destructor (rtx symbol, int priority)
17580 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
17583 /* A finite state machine takes care of noticing whether or not instructions
17584 can be conditionally executed, and thus decrease execution time and code
17585 size by deleting branch instructions. The fsm is controlled by
17586 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17588 /* The state of the fsm controlling condition codes are:
17589 0: normal, do nothing special
17590 1: make ASM_OUTPUT_OPCODE not output this instruction
17591 2: make ASM_OUTPUT_OPCODE not output this instruction
17592 3: make instructions conditional
17593 4: make instructions conditional
17595 State transitions (state->state by whom under condition):
17596 0 -> 1 final_prescan_insn if the `target' is a label
17597 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
17598 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
17599 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
17600 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
17601 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
17602 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
17603 (the target insn is arm_target_insn).
17605 If the jump clobbers the conditions then we use states 2 and 4.
17607 A similar thing can be done with conditional return insns.
17609 XXX In case the `target' is an unconditional branch, this conditionalising
17610 of the instructions always reduces code size, but not always execution
17611 time. But then, I want to reduce the code size to somewhere near what
17612 /bin/cc produces. */
17614 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
17615 instructions. When a COND_EXEC instruction is seen the subsequent
17616 instructions are scanned so that multiple conditional instructions can be
17617 combined into a single IT block. arm_condexec_count and arm_condexec_mask
17618 specify the length and true/false mask for the IT block. These will be
17619 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
17621 /* Returns the index of the ARM condition code string in
17622 `arm_condition_codes', or ARM_NV if the comparison is invalid.
17623 COMPARISON should be an rtx like `(eq (...) (...))'. */
17625 maybe_get_arm_condition_code (rtx comparison)
17627 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
17628 enum arm_cond_code code;
17629 enum rtx_code comp_code = GET_CODE (comparison);
17631 if (GET_MODE_CLASS (mode) != MODE_CC)
17632 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
17633 XEXP (comparison, 1));
17637 case CC_DNEmode: code = ARM_NE; goto dominance;
17638 case CC_DEQmode: code = ARM_EQ; goto dominance;
17639 case CC_DGEmode: code = ARM_GE; goto dominance;
17640 case CC_DGTmode: code = ARM_GT; goto dominance;
17641 case CC_DLEmode: code = ARM_LE; goto dominance;
17642 case CC_DLTmode: code = ARM_LT; goto dominance;
17643 case CC_DGEUmode: code = ARM_CS; goto dominance;
17644 case CC_DGTUmode: code = ARM_HI; goto dominance;
17645 case CC_DLEUmode: code = ARM_LS; goto dominance;
17646 case CC_DLTUmode: code = ARM_CC;
17649 if (comp_code == EQ)
17650 return ARM_INVERSE_CONDITION_CODE (code);
17651 if (comp_code == NE)
17658 case NE: return ARM_NE;
17659 case EQ: return ARM_EQ;
17660 case GE: return ARM_PL;
17661 case LT: return ARM_MI;
17662 default: return ARM_NV;
17668 case NE: return ARM_NE;
17669 case EQ: return ARM_EQ;
17670 default: return ARM_NV;
17676 case NE: return ARM_MI;
17677 case EQ: return ARM_PL;
17678 default: return ARM_NV;
17683 /* These encodings assume that AC=1 in the FPA system control
17684 byte. This allows us to handle all cases except UNEQ and
17688 case GE: return ARM_GE;
17689 case GT: return ARM_GT;
17690 case LE: return ARM_LS;
17691 case LT: return ARM_MI;
17692 case NE: return ARM_NE;
17693 case EQ: return ARM_EQ;
17694 case ORDERED: return ARM_VC;
17695 case UNORDERED: return ARM_VS;
17696 case UNLT: return ARM_LT;
17697 case UNLE: return ARM_LE;
17698 case UNGT: return ARM_HI;
17699 case UNGE: return ARM_PL;
17700 /* UNEQ and LTGT do not have a representation. */
17701 case UNEQ: /* Fall through. */
17702 case LTGT: /* Fall through. */
17703 default: return ARM_NV;
17709 case NE: return ARM_NE;
17710 case EQ: return ARM_EQ;
17711 case GE: return ARM_LE;
17712 case GT: return ARM_LT;
17713 case LE: return ARM_GE;
17714 case LT: return ARM_GT;
17715 case GEU: return ARM_LS;
17716 case GTU: return ARM_CC;
17717 case LEU: return ARM_CS;
17718 case LTU: return ARM_HI;
17719 default: return ARM_NV;
17725 case LTU: return ARM_CS;
17726 case GEU: return ARM_CC;
17727 default: return ARM_NV;
17733 case NE: return ARM_NE;
17734 case EQ: return ARM_EQ;
17735 case GEU: return ARM_CS;
17736 case GTU: return ARM_HI;
17737 case LEU: return ARM_LS;
17738 case LTU: return ARM_CC;
17739 default: return ARM_NV;
17745 case GE: return ARM_GE;
17746 case LT: return ARM_LT;
17747 case GEU: return ARM_CS;
17748 case LTU: return ARM_CC;
17749 default: return ARM_NV;
17755 case NE: return ARM_NE;
17756 case EQ: return ARM_EQ;
17757 case GE: return ARM_GE;
17758 case GT: return ARM_GT;
17759 case LE: return ARM_LE;
17760 case LT: return ARM_LT;
17761 case GEU: return ARM_CS;
17762 case GTU: return ARM_HI;
17763 case LEU: return ARM_LS;
17764 case LTU: return ARM_CC;
17765 default: return ARM_NV;
17768 default: gcc_unreachable ();
17772 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
17773 static enum arm_cond_code
17774 get_arm_condition_code (rtx comparison)
17776 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
17777 gcc_assert (code != ARM_NV);
17781 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17784 thumb2_final_prescan_insn (rtx insn)
17786 rtx first_insn = insn;
17787 rtx body = PATTERN (insn);
17789 enum arm_cond_code code;
17793 /* Remove the previous insn from the count of insns to be output. */
17794 if (arm_condexec_count)
17795 arm_condexec_count--;
17797 /* Nothing to do if we are already inside a conditional block. */
17798 if (arm_condexec_count)
17801 if (GET_CODE (body) != COND_EXEC)
17804 /* Conditional jumps are implemented directly. */
17805 if (GET_CODE (insn) == JUMP_INSN)
17808 predicate = COND_EXEC_TEST (body);
17809 arm_current_cc = get_arm_condition_code (predicate);
17811 n = get_attr_ce_count (insn);
17812 arm_condexec_count = 1;
17813 arm_condexec_mask = (1 << n) - 1;
17814 arm_condexec_masklen = n;
17815 /* See if subsequent instructions can be combined into the same block. */
17818 insn = next_nonnote_insn (insn);
17820 /* Jumping into the middle of an IT block is illegal, so a label or
17821 barrier terminates the block. */
17822 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17825 body = PATTERN (insn);
17826 /* USE and CLOBBER aren't really insns, so just skip them. */
17827 if (GET_CODE (body) == USE
17828 || GET_CODE (body) == CLOBBER)
17831 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17832 if (GET_CODE (body) != COND_EXEC)
17834 /* Allow up to 4 conditionally executed instructions in a block. */
17835 n = get_attr_ce_count (insn);
17836 if (arm_condexec_masklen + n > 4)
17839 predicate = COND_EXEC_TEST (body);
17840 code = get_arm_condition_code (predicate);
17841 mask = (1 << n) - 1;
17842 if (arm_current_cc == code)
17843 arm_condexec_mask |= (mask << arm_condexec_masklen);
17844 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17847 arm_condexec_count++;
17848 arm_condexec_masklen += n;
17850 /* A jump must be the last instruction in a conditional block. */
17851 if (GET_CODE(insn) == JUMP_INSN)
17854 /* Restore recog_data (getting the attributes of other insns can
17855 destroy this array, but final.c assumes that it remains intact
17856 across this call). */
17857 extract_constrain_insn_cached (first_insn);
17861 arm_final_prescan_insn (rtx insn)
17863 /* BODY will hold the body of INSN. */
17864 rtx body = PATTERN (insn);
17866 /* This will be 1 if trying to repeat the trick, and things need to be
17867 reversed if it appears to fail. */
17870 /* If we start with a return insn, we only succeed if we find another one. */
17871 int seeking_return = 0;
17872 enum rtx_code return_code = UNKNOWN;
17874 /* START_INSN will hold the insn from where we start looking. This is the
17875 first insn after the following code_label if REVERSE is true. */
17876 rtx start_insn = insn;
17878 /* If in state 4, check if the target branch is reached, in order to
17879 change back to state 0. */
17880 if (arm_ccfsm_state == 4)
17882 if (insn == arm_target_insn)
17884 arm_target_insn = NULL;
17885 arm_ccfsm_state = 0;
17890 /* If in state 3, it is possible to repeat the trick, if this insn is an
17891 unconditional branch to a label, and immediately following this branch
17892 is the previous target label which is only used once, and the label this
17893 branch jumps to is not too far off. */
17894 if (arm_ccfsm_state == 3)
17896 if (simplejump_p (insn))
17898 start_insn = next_nonnote_insn (start_insn);
17899 if (GET_CODE (start_insn) == BARRIER)
17901 /* XXX Isn't this always a barrier? */
17902 start_insn = next_nonnote_insn (start_insn);
17904 if (GET_CODE (start_insn) == CODE_LABEL
17905 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17906 && LABEL_NUSES (start_insn) == 1)
17911 else if (ANY_RETURN_P (body))
17913 start_insn = next_nonnote_insn (start_insn);
17914 if (GET_CODE (start_insn) == BARRIER)
17915 start_insn = next_nonnote_insn (start_insn);
17916 if (GET_CODE (start_insn) == CODE_LABEL
17917 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17918 && LABEL_NUSES (start_insn) == 1)
17921 seeking_return = 1;
17922 return_code = GET_CODE (body);
17931 gcc_assert (!arm_ccfsm_state || reverse);
17932 if (GET_CODE (insn) != JUMP_INSN)
17935 /* This jump might be paralleled with a clobber of the condition codes
17936 the jump should always come first */
17937 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17938 body = XVECEXP (body, 0, 0);
17941 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17942 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17945 int fail = FALSE, succeed = FALSE;
17946 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17947 int then_not_else = TRUE;
17948 rtx this_insn = start_insn, label = 0;
17950 /* Register the insn jumped to. */
17953 if (!seeking_return)
17954 label = XEXP (SET_SRC (body), 0);
17956 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17957 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17958 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17960 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17961 then_not_else = FALSE;
17963 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
17965 seeking_return = 1;
17966 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
17968 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
17970 seeking_return = 1;
17971 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
17972 then_not_else = FALSE;
17975 gcc_unreachable ();
17977 /* See how many insns this branch skips, and what kind of insns. If all
17978 insns are okay, and the label or unconditional branch to the same
17979 label is not too far away, succeed. */
17980 for (insns_skipped = 0;
17981 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17985 this_insn = next_nonnote_insn (this_insn);
17989 switch (GET_CODE (this_insn))
17992 /* Succeed if it is the target label, otherwise fail since
17993 control falls in from somewhere else. */
17994 if (this_insn == label)
17996 arm_ccfsm_state = 1;
18004 /* Succeed if the following insn is the target label.
18006 If return insns are used then the last insn in a function
18007 will be a barrier. */
18008 this_insn = next_nonnote_insn (this_insn);
18009 if (this_insn && this_insn == label)
18011 arm_ccfsm_state = 1;
18019 /* The AAPCS says that conditional calls should not be
18020 used since they make interworking inefficient (the
18021 linker can't transform BL<cond> into BLX). That's
18022 only a problem if the machine has BLX. */
18029 /* Succeed if the following insn is the target label, or
18030 if the following two insns are a barrier and the
18032 this_insn = next_nonnote_insn (this_insn);
18033 if (this_insn && GET_CODE (this_insn) == BARRIER)
18034 this_insn = next_nonnote_insn (this_insn);
18036 if (this_insn && this_insn == label
18037 && insns_skipped < max_insns_skipped)
18039 arm_ccfsm_state = 1;
18047 /* If this is an unconditional branch to the same label, succeed.
18048 If it is to another label, do nothing. If it is conditional,
18050 /* XXX Probably, the tests for SET and the PC are
18053 scanbody = PATTERN (this_insn);
18054 if (GET_CODE (scanbody) == SET
18055 && GET_CODE (SET_DEST (scanbody)) == PC)
18057 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
18058 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
18060 arm_ccfsm_state = 2;
18063 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
18066 /* Fail if a conditional return is undesirable (e.g. on a
18067 StrongARM), but still allow this if optimizing for size. */
18068 else if (GET_CODE (scanbody) == return_code
18069 && !use_return_insn (TRUE, NULL)
18072 else if (GET_CODE (scanbody) == return_code)
18074 arm_ccfsm_state = 2;
18077 else if (GET_CODE (scanbody) == PARALLEL)
18079 switch (get_attr_conds (this_insn))
18089 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
18094 /* Instructions using or affecting the condition codes make it
18096 scanbody = PATTERN (this_insn);
18097 if (!(GET_CODE (scanbody) == SET
18098 || GET_CODE (scanbody) == PARALLEL)
18099 || get_attr_conds (this_insn) != CONDS_NOCOND)
18102 /* A conditional cirrus instruction must be followed by
18103 a non Cirrus instruction. However, since we
18104 conditionalize instructions in this function and by
18105 the time we get here we can't add instructions
18106 (nops), because shorten_branches() has already been
18107 called, we will disable conditionalizing Cirrus
18108 instructions to be safe. */
18109 if (GET_CODE (scanbody) != USE
18110 && GET_CODE (scanbody) != CLOBBER
18111 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
18121 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
18122 arm_target_label = CODE_LABEL_NUMBER (label);
18125 gcc_assert (seeking_return || arm_ccfsm_state == 2);
18127 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
18129 this_insn = next_nonnote_insn (this_insn);
18130 gcc_assert (!this_insn
18131 || (GET_CODE (this_insn) != BARRIER
18132 && GET_CODE (this_insn) != CODE_LABEL));
18136 /* Oh, dear! we ran off the end.. give up. */
18137 extract_constrain_insn_cached (insn);
18138 arm_ccfsm_state = 0;
18139 arm_target_insn = NULL;
18142 arm_target_insn = this_insn;
18145 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18148 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
18150 if (reverse || then_not_else)
18151 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
18154 /* Restore recog_data (getting the attributes of other insns can
18155 destroy this array, but final.c assumes that it remains intact
18156 across this call. */
18157 extract_constrain_insn_cached (insn);
18161 /* Output IT instructions. */
18163 thumb2_asm_output_opcode (FILE * stream)
18168 if (arm_condexec_mask)
18170 for (n = 0; n < arm_condexec_masklen; n++)
18171 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
18173 asm_fprintf(stream, "i%s\t%s\n\t", buff,
18174 arm_condition_codes[arm_current_cc]);
18175 arm_condexec_mask = 0;
18179 /* Returns true if REGNO is a valid register
18180 for holding a quantity of type MODE. */
18182 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
18184 if (GET_MODE_CLASS (mode) == MODE_CC)
18185 return (regno == CC_REGNUM
18186 || (TARGET_HARD_FLOAT && TARGET_VFP
18187 && regno == VFPCC_REGNUM));
18190 /* For the Thumb we only allow values bigger than SImode in
18191 registers 0 - 6, so that there is always a second low
18192 register available to hold the upper part of the value.
18193 We probably we ought to ensure that the register is the
18194 start of an even numbered register pair. */
18195 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
18197 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
18198 && IS_CIRRUS_REGNUM (regno))
18199 /* We have outlawed SI values in Cirrus registers because they
18200 reside in the lower 32 bits, but SF values reside in the
18201 upper 32 bits. This causes gcc all sorts of grief. We can't
18202 even split the registers into pairs because Cirrus SI values
18203 get sign extended to 64bits-- aldyh. */
18204 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
18206 if (TARGET_HARD_FLOAT && TARGET_VFP
18207 && IS_VFP_REGNUM (regno))
18209 if (mode == SFmode || mode == SImode)
18210 return VFP_REGNO_OK_FOR_SINGLE (regno);
18212 if (mode == DFmode)
18213 return VFP_REGNO_OK_FOR_DOUBLE (regno);
18215 /* VFP registers can hold HFmode values, but there is no point in
18216 putting them there unless we have hardware conversion insns. */
18217 if (mode == HFmode)
18218 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
18221 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
18222 || (VALID_NEON_QREG_MODE (mode)
18223 && NEON_REGNO_OK_FOR_QUAD (regno))
18224 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
18225 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
18226 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
18227 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
18228 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
18233 if (TARGET_REALLY_IWMMXT)
18235 if (IS_IWMMXT_GR_REGNUM (regno))
18236 return mode == SImode;
18238 if (IS_IWMMXT_REGNUM (regno))
18239 return VALID_IWMMXT_REG_MODE (mode);
18242 /* We allow almost any value to be stored in the general registers.
18243 Restrict doubleword quantities to even register pairs so that we can
18244 use ldrd. Do not allow very large Neon structure opaque modes in
18245 general registers; they would use too many. */
18246 if (regno <= LAST_ARM_REGNUM)
18247 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
18248 && ARM_NUM_REGS (mode) <= 4;
18250 if (regno == FRAME_POINTER_REGNUM
18251 || regno == ARG_POINTER_REGNUM)
18252 /* We only allow integers in the fake hard registers. */
18253 return GET_MODE_CLASS (mode) == MODE_INT;
18255 /* The only registers left are the FPA registers
18256 which we only allow to hold FP values. */
18257 return (TARGET_HARD_FLOAT && TARGET_FPA
18258 && GET_MODE_CLASS (mode) == MODE_FLOAT
18259 && regno >= FIRST_FPA_REGNUM
18260 && regno <= LAST_FPA_REGNUM);
18263 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18264 not used in arm mode. */
18267 arm_regno_class (int regno)
18271 if (regno == STACK_POINTER_REGNUM)
18273 if (regno == CC_REGNUM)
18280 if (TARGET_THUMB2 && regno < 8)
18283 if ( regno <= LAST_ARM_REGNUM
18284 || regno == FRAME_POINTER_REGNUM
18285 || regno == ARG_POINTER_REGNUM)
18286 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
18288 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
18289 return TARGET_THUMB2 ? CC_REG : NO_REGS;
18291 if (IS_CIRRUS_REGNUM (regno))
18292 return CIRRUS_REGS;
18294 if (IS_VFP_REGNUM (regno))
18296 if (regno <= D7_VFP_REGNUM)
18297 return VFP_D0_D7_REGS;
18298 else if (regno <= LAST_LO_VFP_REGNUM)
18299 return VFP_LO_REGS;
18301 return VFP_HI_REGS;
18304 if (IS_IWMMXT_REGNUM (regno))
18305 return IWMMXT_REGS;
18307 if (IS_IWMMXT_GR_REGNUM (regno))
18308 return IWMMXT_GR_REGS;
18313 /* Handle a special case when computing the offset
18314 of an argument from the frame pointer. */
18316 arm_debugger_arg_offset (int value, rtx addr)
18320 /* We are only interested if dbxout_parms() failed to compute the offset. */
18324 /* We can only cope with the case where the address is held in a register. */
18325 if (GET_CODE (addr) != REG)
18328 /* If we are using the frame pointer to point at the argument, then
18329 an offset of 0 is correct. */
18330 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
18333 /* If we are using the stack pointer to point at the
18334 argument, then an offset of 0 is correct. */
18335 /* ??? Check this is consistent with thumb2 frame layout. */
18336 if ((TARGET_THUMB || !frame_pointer_needed)
18337 && REGNO (addr) == SP_REGNUM)
18340 /* Oh dear. The argument is pointed to by a register rather
18341 than being held in a register, or being stored at a known
18342 offset from the frame pointer. Since GDB only understands
18343 those two kinds of argument we must translate the address
18344 held in the register into an offset from the frame pointer.
18345 We do this by searching through the insns for the function
18346 looking to see where this register gets its value. If the
18347 register is initialized from the frame pointer plus an offset
18348 then we are in luck and we can continue, otherwise we give up.
18350 This code is exercised by producing debugging information
18351 for a function with arguments like this:
18353 double func (double a, double b, int c, double d) {return d;}
18355 Without this code the stab for parameter 'd' will be set to
18356 an offset of 0 from the frame pointer, rather than 8. */
18358 /* The if() statement says:
18360 If the insn is a normal instruction
18361 and if the insn is setting the value in a register
18362 and if the register being set is the register holding the address of the argument
18363 and if the address is computing by an addition
18364 that involves adding to a register
18365 which is the frame pointer
18370 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18372 if ( GET_CODE (insn) == INSN
18373 && GET_CODE (PATTERN (insn)) == SET
18374 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
18375 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
18376 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
18377 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18378 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
18381 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
18390 warning (0, "unable to compute real location of stacked parameter");
18391 value = 8; /* XXX magic hack */
18411 T_MAX /* Size of enum. Keep last. */
18412 } neon_builtin_type_mode;
18414 #define TYPE_MODE_BIT(X) (1 << (X))
18416 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18417 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18418 | TYPE_MODE_BIT (T_DI))
18419 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18420 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18421 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18423 #define v8qi_UP T_V8QI
18424 #define v4hi_UP T_V4HI
18425 #define v2si_UP T_V2SI
18426 #define v2sf_UP T_V2SF
18428 #define v16qi_UP T_V16QI
18429 #define v8hi_UP T_V8HI
18430 #define v4si_UP T_V4SI
18431 #define v4sf_UP T_V4SF
18432 #define v2di_UP T_V2DI
18437 #define UP(X) X##_UP
18470 NEON_LOADSTRUCTLANE,
18472 NEON_STORESTRUCTLANE,
18481 const neon_itype itype;
18482 const neon_builtin_type_mode mode;
18483 const enum insn_code code;
18484 unsigned int fcode;
18485 } neon_builtin_datum;
18487 #define CF(N,X) CODE_FOR_neon_##N##X
18489 #define VAR1(T, N, A) \
18490 {#N, NEON_##T, UP (A), CF (N, A), 0}
18491 #define VAR2(T, N, A, B) \
18493 {#N, NEON_##T, UP (B), CF (N, B), 0}
18494 #define VAR3(T, N, A, B, C) \
18495 VAR2 (T, N, A, B), \
18496 {#N, NEON_##T, UP (C), CF (N, C), 0}
18497 #define VAR4(T, N, A, B, C, D) \
18498 VAR3 (T, N, A, B, C), \
18499 {#N, NEON_##T, UP (D), CF (N, D), 0}
18500 #define VAR5(T, N, A, B, C, D, E) \
18501 VAR4 (T, N, A, B, C, D), \
18502 {#N, NEON_##T, UP (E), CF (N, E), 0}
18503 #define VAR6(T, N, A, B, C, D, E, F) \
18504 VAR5 (T, N, A, B, C, D, E), \
18505 {#N, NEON_##T, UP (F), CF (N, F), 0}
18506 #define VAR7(T, N, A, B, C, D, E, F, G) \
18507 VAR6 (T, N, A, B, C, D, E, F), \
18508 {#N, NEON_##T, UP (G), CF (N, G), 0}
18509 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18510 VAR7 (T, N, A, B, C, D, E, F, G), \
18511 {#N, NEON_##T, UP (H), CF (N, H), 0}
18512 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18513 VAR8 (T, N, A, B, C, D, E, F, G, H), \
18514 {#N, NEON_##T, UP (I), CF (N, I), 0}
18515 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18516 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
18517 {#N, NEON_##T, UP (J), CF (N, J), 0}
18519 /* The mode entries in the following table correspond to the "key" type of the
18520 instruction variant, i.e. equivalent to that which would be specified after
18521 the assembler mnemonic, which usually refers to the last vector operand.
18522 (Signed/unsigned/polynomial types are not differentiated between though, and
18523 are all mapped onto the same mode for a given element size.) The modes
18524 listed per instruction should be the same as those defined for that
18525 instruction's pattern in neon.md. */
18527 static neon_builtin_datum neon_builtin_data[] =
18529 VAR10 (BINOP, vadd,
18530 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18531 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
18532 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
18533 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18534 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18535 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
18536 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18537 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18538 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
18539 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18540 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
18541 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
18542 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
18543 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
18544 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
18545 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
18546 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
18547 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
18548 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
18549 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
18550 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
18551 VAR2 (BINOP, vqdmull, v4hi, v2si),
18552 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18553 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18554 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18555 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
18556 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
18557 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
18558 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18559 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18560 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18561 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
18562 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18563 VAR10 (BINOP, vsub,
18564 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18565 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
18566 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
18567 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18568 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18569 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
18570 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18571 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18572 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18573 VAR2 (BINOP, vcage, v2sf, v4sf),
18574 VAR2 (BINOP, vcagt, v2sf, v4sf),
18575 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18576 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18577 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
18578 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18579 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
18580 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18581 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18582 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
18583 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18584 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18585 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
18586 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
18587 VAR2 (BINOP, vrecps, v2sf, v4sf),
18588 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
18589 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18590 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18591 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18592 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18593 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18594 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18595 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18596 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18597 VAR2 (UNOP, vcnt, v8qi, v16qi),
18598 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
18599 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
18600 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18601 /* FIXME: vget_lane supports more variants than this! */
18602 VAR10 (GETLANE, vget_lane,
18603 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18604 VAR10 (SETLANE, vset_lane,
18605 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18606 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
18607 VAR10 (DUP, vdup_n,
18608 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18609 VAR10 (DUPLANE, vdup_lane,
18610 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18611 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
18612 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
18613 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
18614 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
18615 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
18616 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
18617 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
18618 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18619 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18620 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
18621 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
18622 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18623 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
18624 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
18625 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18626 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18627 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
18628 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
18629 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18630 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
18631 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
18632 VAR10 (BINOP, vext,
18633 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18634 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18635 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
18636 VAR2 (UNOP, vrev16, v8qi, v16qi),
18637 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
18638 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
18639 VAR10 (SELECT, vbsl,
18640 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18641 VAR1 (VTBL, vtbl1, v8qi),
18642 VAR1 (VTBL, vtbl2, v8qi),
18643 VAR1 (VTBL, vtbl3, v8qi),
18644 VAR1 (VTBL, vtbl4, v8qi),
18645 VAR1 (VTBX, vtbx1, v8qi),
18646 VAR1 (VTBX, vtbx2, v8qi),
18647 VAR1 (VTBX, vtbx3, v8qi),
18648 VAR1 (VTBX, vtbx4, v8qi),
18649 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18650 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18651 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18652 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
18653 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
18654 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
18655 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
18656 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
18657 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
18658 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
18659 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
18660 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
18661 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
18662 VAR10 (LOAD1, vld1,
18663 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18664 VAR10 (LOAD1LANE, vld1_lane,
18665 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18666 VAR10 (LOAD1, vld1_dup,
18667 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18668 VAR10 (STORE1, vst1,
18669 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18670 VAR10 (STORE1LANE, vst1_lane,
18671 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18673 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18674 VAR7 (LOADSTRUCTLANE, vld2_lane,
18675 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18676 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
18677 VAR9 (STORESTRUCT, vst2,
18678 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18679 VAR7 (STORESTRUCTLANE, vst2_lane,
18680 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18682 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18683 VAR7 (LOADSTRUCTLANE, vld3_lane,
18684 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18685 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
18686 VAR9 (STORESTRUCT, vst3,
18687 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18688 VAR7 (STORESTRUCTLANE, vst3_lane,
18689 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18690 VAR9 (LOADSTRUCT, vld4,
18691 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18692 VAR7 (LOADSTRUCTLANE, vld4_lane,
18693 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18694 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
18695 VAR9 (STORESTRUCT, vst4,
18696 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18697 VAR7 (STORESTRUCTLANE, vst4_lane,
18698 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18699 VAR10 (LOGICBINOP, vand,
18700 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18701 VAR10 (LOGICBINOP, vorr,
18702 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18703 VAR10 (BINOP, veor,
18704 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18705 VAR10 (LOGICBINOP, vbic,
18706 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18707 VAR10 (LOGICBINOP, vorn,
18708 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
18723 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18724 symbolic names defined here (which would require too much duplication).
18728 ARM_BUILTIN_GETWCX,
18729 ARM_BUILTIN_SETWCX,
18733 ARM_BUILTIN_WAVG2BR,
18734 ARM_BUILTIN_WAVG2HR,
18735 ARM_BUILTIN_WAVG2B,
18736 ARM_BUILTIN_WAVG2H,
18743 ARM_BUILTIN_WMACSZ,
18745 ARM_BUILTIN_WMACUZ,
18748 ARM_BUILTIN_WSADBZ,
18750 ARM_BUILTIN_WSADHZ,
18752 ARM_BUILTIN_WALIGN,
18755 ARM_BUILTIN_TMIAPH,
18756 ARM_BUILTIN_TMIABB,
18757 ARM_BUILTIN_TMIABT,
18758 ARM_BUILTIN_TMIATB,
18759 ARM_BUILTIN_TMIATT,
18761 ARM_BUILTIN_TMOVMSKB,
18762 ARM_BUILTIN_TMOVMSKH,
18763 ARM_BUILTIN_TMOVMSKW,
18765 ARM_BUILTIN_TBCSTB,
18766 ARM_BUILTIN_TBCSTH,
18767 ARM_BUILTIN_TBCSTW,
18769 ARM_BUILTIN_WMADDS,
18770 ARM_BUILTIN_WMADDU,
18772 ARM_BUILTIN_WPACKHSS,
18773 ARM_BUILTIN_WPACKWSS,
18774 ARM_BUILTIN_WPACKDSS,
18775 ARM_BUILTIN_WPACKHUS,
18776 ARM_BUILTIN_WPACKWUS,
18777 ARM_BUILTIN_WPACKDUS,
18782 ARM_BUILTIN_WADDSSB,
18783 ARM_BUILTIN_WADDSSH,
18784 ARM_BUILTIN_WADDSSW,
18785 ARM_BUILTIN_WADDUSB,
18786 ARM_BUILTIN_WADDUSH,
18787 ARM_BUILTIN_WADDUSW,
18791 ARM_BUILTIN_WSUBSSB,
18792 ARM_BUILTIN_WSUBSSH,
18793 ARM_BUILTIN_WSUBSSW,
18794 ARM_BUILTIN_WSUBUSB,
18795 ARM_BUILTIN_WSUBUSH,
18796 ARM_BUILTIN_WSUBUSW,
18803 ARM_BUILTIN_WCMPEQB,
18804 ARM_BUILTIN_WCMPEQH,
18805 ARM_BUILTIN_WCMPEQW,
18806 ARM_BUILTIN_WCMPGTUB,
18807 ARM_BUILTIN_WCMPGTUH,
18808 ARM_BUILTIN_WCMPGTUW,
18809 ARM_BUILTIN_WCMPGTSB,
18810 ARM_BUILTIN_WCMPGTSH,
18811 ARM_BUILTIN_WCMPGTSW,
18813 ARM_BUILTIN_TEXTRMSB,
18814 ARM_BUILTIN_TEXTRMSH,
18815 ARM_BUILTIN_TEXTRMSW,
18816 ARM_BUILTIN_TEXTRMUB,
18817 ARM_BUILTIN_TEXTRMUH,
18818 ARM_BUILTIN_TEXTRMUW,
18819 ARM_BUILTIN_TINSRB,
18820 ARM_BUILTIN_TINSRH,
18821 ARM_BUILTIN_TINSRW,
18823 ARM_BUILTIN_WMAXSW,
18824 ARM_BUILTIN_WMAXSH,
18825 ARM_BUILTIN_WMAXSB,
18826 ARM_BUILTIN_WMAXUW,
18827 ARM_BUILTIN_WMAXUH,
18828 ARM_BUILTIN_WMAXUB,
18829 ARM_BUILTIN_WMINSW,
18830 ARM_BUILTIN_WMINSH,
18831 ARM_BUILTIN_WMINSB,
18832 ARM_BUILTIN_WMINUW,
18833 ARM_BUILTIN_WMINUH,
18834 ARM_BUILTIN_WMINUB,
18836 ARM_BUILTIN_WMULUM,
18837 ARM_BUILTIN_WMULSM,
18838 ARM_BUILTIN_WMULUL,
18840 ARM_BUILTIN_PSADBH,
18841 ARM_BUILTIN_WSHUFH,
18855 ARM_BUILTIN_WSLLHI,
18856 ARM_BUILTIN_WSLLWI,
18857 ARM_BUILTIN_WSLLDI,
18858 ARM_BUILTIN_WSRAHI,
18859 ARM_BUILTIN_WSRAWI,
18860 ARM_BUILTIN_WSRADI,
18861 ARM_BUILTIN_WSRLHI,
18862 ARM_BUILTIN_WSRLWI,
18863 ARM_BUILTIN_WSRLDI,
18864 ARM_BUILTIN_WRORHI,
18865 ARM_BUILTIN_WRORWI,
18866 ARM_BUILTIN_WRORDI,
18868 ARM_BUILTIN_WUNPCKIHB,
18869 ARM_BUILTIN_WUNPCKIHH,
18870 ARM_BUILTIN_WUNPCKIHW,
18871 ARM_BUILTIN_WUNPCKILB,
18872 ARM_BUILTIN_WUNPCKILH,
18873 ARM_BUILTIN_WUNPCKILW,
18875 ARM_BUILTIN_WUNPCKEHSB,
18876 ARM_BUILTIN_WUNPCKEHSH,
18877 ARM_BUILTIN_WUNPCKEHSW,
18878 ARM_BUILTIN_WUNPCKEHUB,
18879 ARM_BUILTIN_WUNPCKEHUH,
18880 ARM_BUILTIN_WUNPCKEHUW,
18881 ARM_BUILTIN_WUNPCKELSB,
18882 ARM_BUILTIN_WUNPCKELSH,
18883 ARM_BUILTIN_WUNPCKELSW,
18884 ARM_BUILTIN_WUNPCKELUB,
18885 ARM_BUILTIN_WUNPCKELUH,
18886 ARM_BUILTIN_WUNPCKELUW,
18888 ARM_BUILTIN_THREAD_POINTER,
18890 ARM_BUILTIN_NEON_BASE,
18892 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
18895 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
18898 arm_init_neon_builtins (void)
18900 unsigned int i, fcode;
18903 tree neon_intQI_type_node;
18904 tree neon_intHI_type_node;
18905 tree neon_polyQI_type_node;
18906 tree neon_polyHI_type_node;
18907 tree neon_intSI_type_node;
18908 tree neon_intDI_type_node;
18909 tree neon_float_type_node;
18911 tree intQI_pointer_node;
18912 tree intHI_pointer_node;
18913 tree intSI_pointer_node;
18914 tree intDI_pointer_node;
18915 tree float_pointer_node;
18917 tree const_intQI_node;
18918 tree const_intHI_node;
18919 tree const_intSI_node;
18920 tree const_intDI_node;
18921 tree const_float_node;
18923 tree const_intQI_pointer_node;
18924 tree const_intHI_pointer_node;
18925 tree const_intSI_pointer_node;
18926 tree const_intDI_pointer_node;
18927 tree const_float_pointer_node;
18929 tree V8QI_type_node;
18930 tree V4HI_type_node;
18931 tree V2SI_type_node;
18932 tree V2SF_type_node;
18933 tree V16QI_type_node;
18934 tree V8HI_type_node;
18935 tree V4SI_type_node;
18936 tree V4SF_type_node;
18937 tree V2DI_type_node;
18939 tree intUQI_type_node;
18940 tree intUHI_type_node;
18941 tree intUSI_type_node;
18942 tree intUDI_type_node;
18944 tree intEI_type_node;
18945 tree intOI_type_node;
18946 tree intCI_type_node;
18947 tree intXI_type_node;
18949 tree V8QI_pointer_node;
18950 tree V4HI_pointer_node;
18951 tree V2SI_pointer_node;
18952 tree V2SF_pointer_node;
18953 tree V16QI_pointer_node;
18954 tree V8HI_pointer_node;
18955 tree V4SI_pointer_node;
18956 tree V4SF_pointer_node;
18957 tree V2DI_pointer_node;
18959 tree void_ftype_pv8qi_v8qi_v8qi;
18960 tree void_ftype_pv4hi_v4hi_v4hi;
18961 tree void_ftype_pv2si_v2si_v2si;
18962 tree void_ftype_pv2sf_v2sf_v2sf;
18963 tree void_ftype_pdi_di_di;
18964 tree void_ftype_pv16qi_v16qi_v16qi;
18965 tree void_ftype_pv8hi_v8hi_v8hi;
18966 tree void_ftype_pv4si_v4si_v4si;
18967 tree void_ftype_pv4sf_v4sf_v4sf;
18968 tree void_ftype_pv2di_v2di_v2di;
18970 tree reinterp_ftype_dreg[5][5];
18971 tree reinterp_ftype_qreg[5][5];
18972 tree dreg_types[5], qreg_types[5];
18974 /* Create distinguished type nodes for NEON vector element types,
18975 and pointers to values of such types, so we can detect them later. */
18976 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18977 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18978 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18979 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18980 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18981 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18982 neon_float_type_node = make_node (REAL_TYPE);
18983 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18984 layout_type (neon_float_type_node);
18986 /* Define typedefs which exactly correspond to the modes we are basing vector
18987 types on. If you change these names you'll need to change
18988 the table used by arm_mangle_type too. */
18989 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18990 "__builtin_neon_qi");
18991 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18992 "__builtin_neon_hi");
18993 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18994 "__builtin_neon_si");
18995 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18996 "__builtin_neon_sf");
18997 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18998 "__builtin_neon_di");
18999 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
19000 "__builtin_neon_poly8");
19001 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
19002 "__builtin_neon_poly16");
19004 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
19005 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
19006 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
19007 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
19008 float_pointer_node = build_pointer_type (neon_float_type_node);
19010 /* Next create constant-qualified versions of the above types. */
19011 const_intQI_node = build_qualified_type (neon_intQI_type_node,
19013 const_intHI_node = build_qualified_type (neon_intHI_type_node,
19015 const_intSI_node = build_qualified_type (neon_intSI_type_node,
19017 const_intDI_node = build_qualified_type (neon_intDI_type_node,
19019 const_float_node = build_qualified_type (neon_float_type_node,
19022 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
19023 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
19024 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
19025 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
19026 const_float_pointer_node = build_pointer_type (const_float_node);
19028 /* Now create vector types based on our NEON element types. */
19029 /* 64-bit vectors. */
19031 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
19033 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
19035 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
19037 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
19038 /* 128-bit vectors. */
19040 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
19042 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
19044 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
19046 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
19048 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
19050 /* Unsigned integer types for various mode sizes. */
19051 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
19052 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
19053 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
19054 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
19056 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
19057 "__builtin_neon_uqi");
19058 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
19059 "__builtin_neon_uhi");
19060 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
19061 "__builtin_neon_usi");
19062 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
19063 "__builtin_neon_udi");
19065 /* Opaque integer types for structures of vectors. */
19066 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
19067 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
19068 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
19069 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
19071 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
19072 "__builtin_neon_ti");
19073 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
19074 "__builtin_neon_ei");
19075 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
19076 "__builtin_neon_oi");
19077 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
19078 "__builtin_neon_ci");
19079 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
19080 "__builtin_neon_xi");
19082 /* Pointers to vector types. */
19083 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
19084 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
19085 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
19086 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
19087 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
19088 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
19089 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
19090 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
19091 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
19093 /* Operations which return results as pairs. */
19094 void_ftype_pv8qi_v8qi_v8qi =
19095 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
19096 V8QI_type_node, NULL);
19097 void_ftype_pv4hi_v4hi_v4hi =
19098 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
19099 V4HI_type_node, NULL);
19100 void_ftype_pv2si_v2si_v2si =
19101 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
19102 V2SI_type_node, NULL);
19103 void_ftype_pv2sf_v2sf_v2sf =
19104 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
19105 V2SF_type_node, NULL);
19106 void_ftype_pdi_di_di =
19107 build_function_type_list (void_type_node, intDI_pointer_node,
19108 neon_intDI_type_node, neon_intDI_type_node, NULL);
19109 void_ftype_pv16qi_v16qi_v16qi =
19110 build_function_type_list (void_type_node, V16QI_pointer_node,
19111 V16QI_type_node, V16QI_type_node, NULL);
19112 void_ftype_pv8hi_v8hi_v8hi =
19113 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
19114 V8HI_type_node, NULL);
19115 void_ftype_pv4si_v4si_v4si =
19116 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
19117 V4SI_type_node, NULL);
19118 void_ftype_pv4sf_v4sf_v4sf =
19119 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
19120 V4SF_type_node, NULL);
19121 void_ftype_pv2di_v2di_v2di =
19122 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
19123 V2DI_type_node, NULL);
19125 dreg_types[0] = V8QI_type_node;
19126 dreg_types[1] = V4HI_type_node;
19127 dreg_types[2] = V2SI_type_node;
19128 dreg_types[3] = V2SF_type_node;
19129 dreg_types[4] = neon_intDI_type_node;
19131 qreg_types[0] = V16QI_type_node;
19132 qreg_types[1] = V8HI_type_node;
19133 qreg_types[2] = V4SI_type_node;
19134 qreg_types[3] = V4SF_type_node;
19135 qreg_types[4] = V2DI_type_node;
19137 for (i = 0; i < 5; i++)
19140 for (j = 0; j < 5; j++)
19142 reinterp_ftype_dreg[i][j]
19143 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
19144 reinterp_ftype_qreg[i][j]
19145 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
19149 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
19150 i < ARRAY_SIZE (neon_builtin_data);
19153 neon_builtin_datum *d = &neon_builtin_data[i];
19155 const char* const modenames[] = {
19156 "v8qi", "v4hi", "v2si", "v2sf", "di",
19157 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19162 int is_load = 0, is_store = 0;
19164 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
19171 case NEON_LOAD1LANE:
19172 case NEON_LOADSTRUCT:
19173 case NEON_LOADSTRUCTLANE:
19175 /* Fall through. */
19177 case NEON_STORE1LANE:
19178 case NEON_STORESTRUCT:
19179 case NEON_STORESTRUCTLANE:
19182 /* Fall through. */
19185 case NEON_LOGICBINOP:
19186 case NEON_SHIFTINSERT:
19193 case NEON_SHIFTIMM:
19194 case NEON_SHIFTACC:
19200 case NEON_LANEMULL:
19201 case NEON_LANEMULH:
19203 case NEON_SCALARMUL:
19204 case NEON_SCALARMULL:
19205 case NEON_SCALARMULH:
19206 case NEON_SCALARMAC:
19212 tree return_type = void_type_node, args = void_list_node;
19214 /* Build a function type directly from the insn_data for
19215 this builtin. The build_function_type() function takes
19216 care of removing duplicates for us. */
19217 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
19221 if (is_load && k == 1)
19223 /* Neon load patterns always have the memory
19224 operand in the operand 1 position. */
19225 gcc_assert (insn_data[d->code].operand[k].predicate
19226 == neon_struct_operand);
19232 eltype = const_intQI_pointer_node;
19237 eltype = const_intHI_pointer_node;
19242 eltype = const_intSI_pointer_node;
19247 eltype = const_float_pointer_node;
19252 eltype = const_intDI_pointer_node;
19255 default: gcc_unreachable ();
19258 else if (is_store && k == 0)
19260 /* Similarly, Neon store patterns use operand 0 as
19261 the memory location to store to. */
19262 gcc_assert (insn_data[d->code].operand[k].predicate
19263 == neon_struct_operand);
19269 eltype = intQI_pointer_node;
19274 eltype = intHI_pointer_node;
19279 eltype = intSI_pointer_node;
19284 eltype = float_pointer_node;
19289 eltype = intDI_pointer_node;
19292 default: gcc_unreachable ();
19297 switch (insn_data[d->code].operand[k].mode)
19299 case VOIDmode: eltype = void_type_node; break;
19301 case QImode: eltype = neon_intQI_type_node; break;
19302 case HImode: eltype = neon_intHI_type_node; break;
19303 case SImode: eltype = neon_intSI_type_node; break;
19304 case SFmode: eltype = neon_float_type_node; break;
19305 case DImode: eltype = neon_intDI_type_node; break;
19306 case TImode: eltype = intTI_type_node; break;
19307 case EImode: eltype = intEI_type_node; break;
19308 case OImode: eltype = intOI_type_node; break;
19309 case CImode: eltype = intCI_type_node; break;
19310 case XImode: eltype = intXI_type_node; break;
19311 /* 64-bit vectors. */
19312 case V8QImode: eltype = V8QI_type_node; break;
19313 case V4HImode: eltype = V4HI_type_node; break;
19314 case V2SImode: eltype = V2SI_type_node; break;
19315 case V2SFmode: eltype = V2SF_type_node; break;
19316 /* 128-bit vectors. */
19317 case V16QImode: eltype = V16QI_type_node; break;
19318 case V8HImode: eltype = V8HI_type_node; break;
19319 case V4SImode: eltype = V4SI_type_node; break;
19320 case V4SFmode: eltype = V4SF_type_node; break;
19321 case V2DImode: eltype = V2DI_type_node; break;
19322 default: gcc_unreachable ();
19326 if (k == 0 && !is_store)
19327 return_type = eltype;
19329 args = tree_cons (NULL_TREE, eltype, args);
19332 ftype = build_function_type (return_type, args);
19336 case NEON_RESULTPAIR:
19338 switch (insn_data[d->code].operand[1].mode)
19340 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19341 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19342 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19343 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19344 case DImode: ftype = void_ftype_pdi_di_di; break;
19345 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19346 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19347 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19348 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19349 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19350 default: gcc_unreachable ();
19355 case NEON_REINTERP:
19357 /* We iterate over 5 doubleword types, then 5 quadword
19359 int rhs = d->mode % 5;
19360 switch (insn_data[d->code].operand[0].mode)
19362 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19363 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19364 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19365 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19366 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19367 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19368 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19369 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19370 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19371 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19372 default: gcc_unreachable ();
19378 gcc_unreachable ();
19381 gcc_assert (ftype != NULL);
19383 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
19385 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
19387 arm_builtin_decls[fcode] = decl;
19391 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19394 if ((MASK) & insn_flags) \
19397 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19398 BUILT_IN_MD, NULL, NULL_TREE); \
19399 arm_builtin_decls[CODE] = bdecl; \
19404 struct builtin_description
19406 const unsigned int mask;
19407 const enum insn_code icode;
19408 const char * const name;
19409 const enum arm_builtins code;
19410 const enum rtx_code comparison;
19411 const unsigned int flag;
19414 static const struct builtin_description bdesc_2arg[] =
19416 #define IWMMXT_BUILTIN(code, string, builtin) \
19417 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19418 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19420 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
19421 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
19422 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
19423 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
19424 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
19425 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
19426 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
19427 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
19428 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
19429 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
19430 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
19431 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
19432 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
19433 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
19434 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
19435 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
19436 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
19437 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
19438 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
19439 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
19440 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
19441 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
19442 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
19443 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
19444 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
19445 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
19446 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
19447 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
19448 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
19449 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
19450 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
19451 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
19452 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
19453 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
19454 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
19455 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
19456 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
19457 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
19458 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
19459 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
19460 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
19461 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
19462 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
19463 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
19464 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
19465 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
19466 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
19467 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
19468 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
19469 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
19470 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
19471 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
19472 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
19473 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
19474 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
19475 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
19476 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
19477 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
19479 #define IWMMXT_BUILTIN2(code, builtin) \
19480 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19482 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
19483 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
19484 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
19485 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
19486 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
19487 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
19488 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
19489 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
19490 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
19491 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
19492 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
19493 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
19494 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
19495 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
19496 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
19497 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
19498 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
19499 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
19500 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
19501 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
19502 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
19503 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
19504 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
19505 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
19506 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
19507 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
19508 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
19509 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
19510 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
19511 IWMMXT_BUILTIN2 (rordi3, WRORDI)
19512 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
19513 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
19516 static const struct builtin_description bdesc_1arg[] =
19518 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
19519 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
19520 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
19521 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
19522 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
19523 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
19524 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
19525 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
19526 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
19527 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
19528 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
19529 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
19530 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
19531 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
19532 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
19533 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
19534 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
19535 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
19538 /* Set up all the iWMMXt builtins. This is not called if
19539 TARGET_IWMMXT is zero. */
19542 arm_init_iwmmxt_builtins (void)
19544 const struct builtin_description * d;
19547 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
19548 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
19549 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
19552 = build_function_type_list (integer_type_node,
19553 integer_type_node, NULL_TREE);
19554 tree v8qi_ftype_v8qi_v8qi_int
19555 = build_function_type_list (V8QI_type_node,
19556 V8QI_type_node, V8QI_type_node,
19557 integer_type_node, NULL_TREE);
19558 tree v4hi_ftype_v4hi_int
19559 = build_function_type_list (V4HI_type_node,
19560 V4HI_type_node, integer_type_node, NULL_TREE);
19561 tree v2si_ftype_v2si_int
19562 = build_function_type_list (V2SI_type_node,
19563 V2SI_type_node, integer_type_node, NULL_TREE);
19564 tree v2si_ftype_di_di
19565 = build_function_type_list (V2SI_type_node,
19566 long_long_integer_type_node,
19567 long_long_integer_type_node,
19569 tree di_ftype_di_int
19570 = build_function_type_list (long_long_integer_type_node,
19571 long_long_integer_type_node,
19572 integer_type_node, NULL_TREE);
19573 tree di_ftype_di_int_int
19574 = build_function_type_list (long_long_integer_type_node,
19575 long_long_integer_type_node,
19577 integer_type_node, NULL_TREE);
19578 tree int_ftype_v8qi
19579 = build_function_type_list (integer_type_node,
19580 V8QI_type_node, NULL_TREE);
19581 tree int_ftype_v4hi
19582 = build_function_type_list (integer_type_node,
19583 V4HI_type_node, NULL_TREE);
19584 tree int_ftype_v2si
19585 = build_function_type_list (integer_type_node,
19586 V2SI_type_node, NULL_TREE);
19587 tree int_ftype_v8qi_int
19588 = build_function_type_list (integer_type_node,
19589 V8QI_type_node, integer_type_node, NULL_TREE);
19590 tree int_ftype_v4hi_int
19591 = build_function_type_list (integer_type_node,
19592 V4HI_type_node, integer_type_node, NULL_TREE);
19593 tree int_ftype_v2si_int
19594 = build_function_type_list (integer_type_node,
19595 V2SI_type_node, integer_type_node, NULL_TREE);
19596 tree v8qi_ftype_v8qi_int_int
19597 = build_function_type_list (V8QI_type_node,
19598 V8QI_type_node, integer_type_node,
19599 integer_type_node, NULL_TREE);
19600 tree v4hi_ftype_v4hi_int_int
19601 = build_function_type_list (V4HI_type_node,
19602 V4HI_type_node, integer_type_node,
19603 integer_type_node, NULL_TREE);
19604 tree v2si_ftype_v2si_int_int
19605 = build_function_type_list (V2SI_type_node,
19606 V2SI_type_node, integer_type_node,
19607 integer_type_node, NULL_TREE);
19608 /* Miscellaneous. */
19609 tree v8qi_ftype_v4hi_v4hi
19610 = build_function_type_list (V8QI_type_node,
19611 V4HI_type_node, V4HI_type_node, NULL_TREE);
19612 tree v4hi_ftype_v2si_v2si
19613 = build_function_type_list (V4HI_type_node,
19614 V2SI_type_node, V2SI_type_node, NULL_TREE);
19615 tree v2si_ftype_v4hi_v4hi
19616 = build_function_type_list (V2SI_type_node,
19617 V4HI_type_node, V4HI_type_node, NULL_TREE);
19618 tree v2si_ftype_v8qi_v8qi
19619 = build_function_type_list (V2SI_type_node,
19620 V8QI_type_node, V8QI_type_node, NULL_TREE);
19621 tree v4hi_ftype_v4hi_di
19622 = build_function_type_list (V4HI_type_node,
19623 V4HI_type_node, long_long_integer_type_node,
19625 tree v2si_ftype_v2si_di
19626 = build_function_type_list (V2SI_type_node,
19627 V2SI_type_node, long_long_integer_type_node,
19629 tree void_ftype_int_int
19630 = build_function_type_list (void_type_node,
19631 integer_type_node, integer_type_node,
19634 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
19636 = build_function_type_list (long_long_integer_type_node,
19637 V8QI_type_node, NULL_TREE);
19639 = build_function_type_list (long_long_integer_type_node,
19640 V4HI_type_node, NULL_TREE);
19642 = build_function_type_list (long_long_integer_type_node,
19643 V2SI_type_node, NULL_TREE);
19644 tree v2si_ftype_v4hi
19645 = build_function_type_list (V2SI_type_node,
19646 V4HI_type_node, NULL_TREE);
19647 tree v4hi_ftype_v8qi
19648 = build_function_type_list (V4HI_type_node,
19649 V8QI_type_node, NULL_TREE);
19651 tree di_ftype_di_v4hi_v4hi
19652 = build_function_type_list (long_long_unsigned_type_node,
19653 long_long_unsigned_type_node,
19654 V4HI_type_node, V4HI_type_node,
19657 tree di_ftype_v4hi_v4hi
19658 = build_function_type_list (long_long_unsigned_type_node,
19659 V4HI_type_node,V4HI_type_node,
19662 /* Normal vector binops. */
19663 tree v8qi_ftype_v8qi_v8qi
19664 = build_function_type_list (V8QI_type_node,
19665 V8QI_type_node, V8QI_type_node, NULL_TREE);
19666 tree v4hi_ftype_v4hi_v4hi
19667 = build_function_type_list (V4HI_type_node,
19668 V4HI_type_node,V4HI_type_node, NULL_TREE);
19669 tree v2si_ftype_v2si_v2si
19670 = build_function_type_list (V2SI_type_node,
19671 V2SI_type_node, V2SI_type_node, NULL_TREE);
19672 tree di_ftype_di_di
19673 = build_function_type_list (long_long_unsigned_type_node,
19674 long_long_unsigned_type_node,
19675 long_long_unsigned_type_node,
19678 /* Add all builtins that are more or less simple operations on two
19680 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19682 /* Use one of the operands; the target can have a different mode for
19683 mask-generating compares. */
19684 enum machine_mode mode;
19690 mode = insn_data[d->icode].operand[1].mode;
19695 type = v8qi_ftype_v8qi_v8qi;
19698 type = v4hi_ftype_v4hi_v4hi;
19701 type = v2si_ftype_v2si_v2si;
19704 type = di_ftype_di_di;
19708 gcc_unreachable ();
19711 def_mbuiltin (d->mask, d->name, type, d->code);
19714 /* Add the remaining MMX insns with somewhat more complicated types. */
19715 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
19716 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
19717 ARM_BUILTIN_ ## CODE)
19719 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
19720 iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX);
19721 iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX);
19723 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
19724 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
19725 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
19726 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
19727 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
19728 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
19730 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
19731 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
19732 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
19733 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
19734 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
19735 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
19737 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
19738 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
19739 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
19740 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
19741 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
19742 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
19744 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
19745 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
19746 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
19747 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
19748 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
19749 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
19751 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
19753 iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB);
19754 iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH);
19755 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
19756 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
19758 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
19759 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
19760 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
19761 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
19762 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
19763 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
19764 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
19765 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
19766 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
19768 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
19769 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
19770 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
19772 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
19773 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
19774 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
19776 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
19777 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
19778 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
19779 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
19780 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
19781 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
19783 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
19784 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
19785 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
19786 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
19787 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
19788 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
19789 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
19790 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
19791 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
19792 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
19793 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
19794 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
19796 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
19797 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
19798 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
19799 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
19801 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN);
19802 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
19803 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
19804 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
19805 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
19806 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
19807 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
19809 #undef iwmmx_mbuiltin
19813 arm_init_tls_builtins (void)
19817 ftype = build_function_type (ptr_type_node, void_list_node);
19818 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
19819 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
19821 TREE_NOTHROW (decl) = 1;
19822 TREE_READONLY (decl) = 1;
19823 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
19827 arm_init_fp16_builtins (void)
19829 tree fp16_type = make_node (REAL_TYPE);
19830 TYPE_PRECISION (fp16_type) = 16;
19831 layout_type (fp16_type);
19832 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
19836 arm_init_builtins (void)
19838 arm_init_tls_builtins ();
19840 if (TARGET_REALLY_IWMMXT)
19841 arm_init_iwmmxt_builtins ();
19844 arm_init_neon_builtins ();
19846 if (arm_fp16_format)
19847 arm_init_fp16_builtins ();
19850 /* Return the ARM builtin for CODE. */
19853 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
19855 if (code >= ARM_BUILTIN_MAX)
19856 return error_mark_node;
19858 return arm_builtin_decls[code];
19861 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19863 static const char *
19864 arm_invalid_parameter_type (const_tree t)
19866 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19867 return N_("function parameters cannot have __fp16 type");
19871 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19873 static const char *
19874 arm_invalid_return_type (const_tree t)
19876 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19877 return N_("functions cannot return __fp16 type");
19881 /* Implement TARGET_PROMOTED_TYPE. */
19884 arm_promoted_type (const_tree t)
19886 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19887 return float_type_node;
19891 /* Implement TARGET_CONVERT_TO_TYPE.
19892 Specifically, this hook implements the peculiarity of the ARM
19893 half-precision floating-point C semantics that requires conversions between
19894 __fp16 to or from double to do an intermediate conversion to float. */
19897 arm_convert_to_type (tree type, tree expr)
19899 tree fromtype = TREE_TYPE (expr);
19900 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
19902 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
19903 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
19904 return convert (type, convert (float_type_node, expr));
19908 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19909 This simply adds HFmode as a supported mode; even though we don't
19910 implement arithmetic on this type directly, it's supported by
19911 optabs conversions, much the way the double-word arithmetic is
19912 special-cased in the default hook. */
19915 arm_scalar_mode_supported_p (enum machine_mode mode)
19917 if (mode == HFmode)
19918 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
19919 else if (ALL_FIXED_POINT_MODE_P (mode))
19922 return default_scalar_mode_supported_p (mode);
19925 /* Errors in the source file can cause expand_expr to return const0_rtx
19926 where we expect a vector. To avoid crashing, use one of the vector
19927 clear instructions. */
19930 safe_vector_operand (rtx x, enum machine_mode mode)
19932 if (x != const0_rtx)
19934 x = gen_reg_rtx (mode);
19936 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
19937 : gen_rtx_SUBREG (DImode, x, 0)));
19941 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19944 arm_expand_binop_builtin (enum insn_code icode,
19945 tree exp, rtx target)
19948 tree arg0 = CALL_EXPR_ARG (exp, 0);
19949 tree arg1 = CALL_EXPR_ARG (exp, 1);
19950 rtx op0 = expand_normal (arg0);
19951 rtx op1 = expand_normal (arg1);
19952 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19953 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19954 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19956 if (VECTOR_MODE_P (mode0))
19957 op0 = safe_vector_operand (op0, mode0);
19958 if (VECTOR_MODE_P (mode1))
19959 op1 = safe_vector_operand (op1, mode1);
19962 || GET_MODE (target) != tmode
19963 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19964 target = gen_reg_rtx (tmode);
19966 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
19968 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19969 op0 = copy_to_mode_reg (mode0, op0);
19970 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19971 op1 = copy_to_mode_reg (mode1, op1);
19973 pat = GEN_FCN (icode) (target, op0, op1);
19980 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19983 arm_expand_unop_builtin (enum insn_code icode,
19984 tree exp, rtx target, int do_load)
19987 tree arg0 = CALL_EXPR_ARG (exp, 0);
19988 rtx op0 = expand_normal (arg0);
19989 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19990 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19993 || GET_MODE (target) != tmode
19994 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19995 target = gen_reg_rtx (tmode);
19997 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20000 if (VECTOR_MODE_P (mode0))
20001 op0 = safe_vector_operand (op0, mode0);
20003 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20004 op0 = copy_to_mode_reg (mode0, op0);
20007 pat = GEN_FCN (icode) (target, op0);
20015 NEON_ARG_COPY_TO_REG,
20021 #define NEON_MAX_BUILTIN_ARGS 5
20023 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20024 and return an expression for the accessed memory.
20026 The intrinsic function operates on a block of registers that has
20027 mode REG_MODE. This block contains vectors of type TYPE_MODE.
20028 The function references the memory at EXP in mode MEM_MODE;
20029 this mode may be BLKmode if no more suitable mode is available. */
20032 neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
20033 enum machine_mode reg_mode,
20034 neon_builtin_type_mode type_mode)
20036 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
20037 tree elem_type, upper_bound, array_type;
20039 /* Work out the size of the register block in bytes. */
20040 reg_size = GET_MODE_SIZE (reg_mode);
20042 /* Work out the size of each vector in bytes. */
20043 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
20044 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
20046 /* Work out how many vectors there are. */
20047 gcc_assert (reg_size % vector_size == 0);
20048 nvectors = reg_size / vector_size;
20050 /* Work out how many elements are being loaded or stored.
20051 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20052 and memory elements; anything else implies a lane load or store. */
20053 if (mem_mode == reg_mode)
20054 nelems = vector_size * nvectors;
20058 /* Work out the type of each element. */
20059 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
20060 elem_type = TREE_TYPE (TREE_TYPE (exp));
20062 /* Create a type that describes the full access. */
20063 upper_bound = build_int_cst (size_type_node, nelems - 1);
20064 array_type = build_array_type (elem_type, build_index_type (upper_bound));
20066 /* Dereference EXP using that type. */
20067 exp = convert (build_pointer_type (array_type), exp);
20068 return fold_build2 (MEM_REF, array_type, exp,
20069 build_int_cst (TREE_TYPE (exp), 0));
20072 /* Expand a Neon builtin. */
20074 arm_expand_neon_args (rtx target, int icode, int have_retval,
20075 neon_builtin_type_mode type_mode,
20080 tree arg[NEON_MAX_BUILTIN_ARGS];
20081 rtx op[NEON_MAX_BUILTIN_ARGS];
20082 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20083 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
20084 enum machine_mode other_mode;
20090 || GET_MODE (target) != tmode
20091 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
20092 target = gen_reg_rtx (tmode);
20094 va_start (ap, exp);
20098 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
20100 if (thisarg == NEON_ARG_STOP)
20104 opno = argc + have_retval;
20105 mode[argc] = insn_data[icode].operand[opno].mode;
20106 arg[argc] = CALL_EXPR_ARG (exp, argc);
20107 if (thisarg == NEON_ARG_MEMORY)
20109 other_mode = insn_data[icode].operand[1 - opno].mode;
20110 arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
20111 other_mode, type_mode);
20113 op[argc] = expand_normal (arg[argc]);
20117 case NEON_ARG_COPY_TO_REG:
20118 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20119 if (!(*insn_data[icode].operand[opno].predicate)
20120 (op[argc], mode[argc]))
20121 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
20124 case NEON_ARG_CONSTANT:
20125 /* FIXME: This error message is somewhat unhelpful. */
20126 if (!(*insn_data[icode].operand[opno].predicate)
20127 (op[argc], mode[argc]))
20128 error ("argument must be a constant");
20131 case NEON_ARG_MEMORY:
20132 gcc_assert (MEM_P (op[argc]));
20133 PUT_MODE (op[argc], mode[argc]);
20134 /* ??? arm_neon.h uses the same built-in functions for signed
20135 and unsigned accesses, casting where necessary. This isn't
20137 set_mem_alias_set (op[argc], 0);
20138 if (!(*insn_data[icode].operand[opno].predicate)
20139 (op[argc], mode[argc]))
20140 op[argc] = (replace_equiv_address
20141 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
20144 case NEON_ARG_STOP:
20145 gcc_unreachable ();
20158 pat = GEN_FCN (icode) (target, op[0]);
20162 pat = GEN_FCN (icode) (target, op[0], op[1]);
20166 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
20170 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
20174 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
20178 gcc_unreachable ();
20184 pat = GEN_FCN (icode) (op[0]);
20188 pat = GEN_FCN (icode) (op[0], op[1]);
20192 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20196 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20200 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
20204 gcc_unreachable ();
20215 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20216 constants defined per-instruction or per instruction-variant. Instead, the
20217 required info is looked up in the table neon_builtin_data. */
20219 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
20221 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
20222 neon_itype itype = d->itype;
20223 enum insn_code icode = d->code;
20224 neon_builtin_type_mode type_mode = d->mode;
20231 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20232 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20236 case NEON_SCALARMUL:
20237 case NEON_SCALARMULL:
20238 case NEON_SCALARMULH:
20239 case NEON_SHIFTINSERT:
20240 case NEON_LOGICBINOP:
20241 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20242 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20246 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20247 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20248 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20252 case NEON_SHIFTIMM:
20253 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20254 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
20258 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20259 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20263 case NEON_REINTERP:
20264 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20265 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20269 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20270 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20272 case NEON_RESULTPAIR:
20273 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20274 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20278 case NEON_LANEMULL:
20279 case NEON_LANEMULH:
20280 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20281 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20282 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20285 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20286 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20287 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20289 case NEON_SHIFTACC:
20290 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20291 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20292 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20294 case NEON_SCALARMAC:
20295 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20296 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20297 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20301 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20302 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20306 case NEON_LOADSTRUCT:
20307 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20308 NEON_ARG_MEMORY, NEON_ARG_STOP);
20310 case NEON_LOAD1LANE:
20311 case NEON_LOADSTRUCTLANE:
20312 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20313 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20317 case NEON_STORESTRUCT:
20318 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20319 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20321 case NEON_STORE1LANE:
20322 case NEON_STORESTRUCTLANE:
20323 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20324 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20328 gcc_unreachable ();
20331 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20333 neon_reinterpret (rtx dest, rtx src)
20335 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
20338 /* Emit code to place a Neon pair result in memory locations (with equal
20341 neon_emit_pair_result_insn (enum machine_mode mode,
20342 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
20345 rtx mem = gen_rtx_MEM (mode, destaddr);
20346 rtx tmp1 = gen_reg_rtx (mode);
20347 rtx tmp2 = gen_reg_rtx (mode);
20349 emit_insn (intfn (tmp1, op1, op2, tmp2));
20351 emit_move_insn (mem, tmp1);
20352 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
20353 emit_move_insn (mem, tmp2);
20356 /* Set up operands for a register copy from src to dest, taking care not to
20357 clobber registers in the process.
20358 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
20359 be called with a large N, so that should be OK. */
20362 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
20364 unsigned int copied = 0, opctr = 0;
20365 unsigned int done = (1 << count) - 1;
20368 while (copied != done)
20370 for (i = 0; i < count; i++)
20374 for (j = 0; good && j < count; j++)
20375 if (i != j && (copied & (1 << j)) == 0
20376 && reg_overlap_mentioned_p (src[j], dest[i]))
20381 operands[opctr++] = dest[i];
20382 operands[opctr++] = src[i];
20388 gcc_assert (opctr == count * 2);
20391 /* Expand an expression EXP that calls a built-in function,
20392 with result going to TARGET if that's convenient
20393 (and in mode MODE if that's convenient).
20394 SUBTARGET may be used as the target for computing one of EXP's operands.
20395 IGNORE is nonzero if the value is to be ignored. */
20398 arm_expand_builtin (tree exp,
20400 rtx subtarget ATTRIBUTE_UNUSED,
20401 enum machine_mode mode ATTRIBUTE_UNUSED,
20402 int ignore ATTRIBUTE_UNUSED)
20404 const struct builtin_description * d;
20405 enum insn_code icode;
20406 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20414 int fcode = DECL_FUNCTION_CODE (fndecl);
20416 enum machine_mode tmode;
20417 enum machine_mode mode0;
20418 enum machine_mode mode1;
20419 enum machine_mode mode2;
20421 if (fcode >= ARM_BUILTIN_NEON_BASE)
20422 return arm_expand_neon_builtin (fcode, exp, target);
20426 case ARM_BUILTIN_TEXTRMSB:
20427 case ARM_BUILTIN_TEXTRMUB:
20428 case ARM_BUILTIN_TEXTRMSH:
20429 case ARM_BUILTIN_TEXTRMUH:
20430 case ARM_BUILTIN_TEXTRMSW:
20431 case ARM_BUILTIN_TEXTRMUW:
20432 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
20433 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
20434 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
20435 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
20436 : CODE_FOR_iwmmxt_textrmw);
20438 arg0 = CALL_EXPR_ARG (exp, 0);
20439 arg1 = CALL_EXPR_ARG (exp, 1);
20440 op0 = expand_normal (arg0);
20441 op1 = expand_normal (arg1);
20442 tmode = insn_data[icode].operand[0].mode;
20443 mode0 = insn_data[icode].operand[1].mode;
20444 mode1 = insn_data[icode].operand[2].mode;
20446 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20447 op0 = copy_to_mode_reg (mode0, op0);
20448 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20450 /* @@@ better error message */
20451 error ("selector must be an immediate");
20452 return gen_reg_rtx (tmode);
20455 || GET_MODE (target) != tmode
20456 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20457 target = gen_reg_rtx (tmode);
20458 pat = GEN_FCN (icode) (target, op0, op1);
20464 case ARM_BUILTIN_TINSRB:
20465 case ARM_BUILTIN_TINSRH:
20466 case ARM_BUILTIN_TINSRW:
20467 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
20468 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
20469 : CODE_FOR_iwmmxt_tinsrw);
20470 arg0 = CALL_EXPR_ARG (exp, 0);
20471 arg1 = CALL_EXPR_ARG (exp, 1);
20472 arg2 = CALL_EXPR_ARG (exp, 2);
20473 op0 = expand_normal (arg0);
20474 op1 = expand_normal (arg1);
20475 op2 = expand_normal (arg2);
20476 tmode = insn_data[icode].operand[0].mode;
20477 mode0 = insn_data[icode].operand[1].mode;
20478 mode1 = insn_data[icode].operand[2].mode;
20479 mode2 = insn_data[icode].operand[3].mode;
20481 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20482 op0 = copy_to_mode_reg (mode0, op0);
20483 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20484 op1 = copy_to_mode_reg (mode1, op1);
20485 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20487 /* @@@ better error message */
20488 error ("selector must be an immediate");
20492 || GET_MODE (target) != tmode
20493 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20494 target = gen_reg_rtx (tmode);
20495 pat = GEN_FCN (icode) (target, op0, op1, op2);
20501 case ARM_BUILTIN_SETWCX:
20502 arg0 = CALL_EXPR_ARG (exp, 0);
20503 arg1 = CALL_EXPR_ARG (exp, 1);
20504 op0 = force_reg (SImode, expand_normal (arg0));
20505 op1 = expand_normal (arg1);
20506 emit_insn (gen_iwmmxt_tmcr (op1, op0));
20509 case ARM_BUILTIN_GETWCX:
20510 arg0 = CALL_EXPR_ARG (exp, 0);
20511 op0 = expand_normal (arg0);
20512 target = gen_reg_rtx (SImode);
20513 emit_insn (gen_iwmmxt_tmrc (target, op0));
20516 case ARM_BUILTIN_WSHUFH:
20517 icode = CODE_FOR_iwmmxt_wshufh;
20518 arg0 = CALL_EXPR_ARG (exp, 0);
20519 arg1 = CALL_EXPR_ARG (exp, 1);
20520 op0 = expand_normal (arg0);
20521 op1 = expand_normal (arg1);
20522 tmode = insn_data[icode].operand[0].mode;
20523 mode1 = insn_data[icode].operand[1].mode;
20524 mode2 = insn_data[icode].operand[2].mode;
20526 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20527 op0 = copy_to_mode_reg (mode1, op0);
20528 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20530 /* @@@ better error message */
20531 error ("mask must be an immediate");
20535 || GET_MODE (target) != tmode
20536 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20537 target = gen_reg_rtx (tmode);
20538 pat = GEN_FCN (icode) (target, op0, op1);
20544 case ARM_BUILTIN_WSADB:
20545 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
20546 case ARM_BUILTIN_WSADH:
20547 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
20548 case ARM_BUILTIN_WSADBZ:
20549 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
20550 case ARM_BUILTIN_WSADHZ:
20551 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
20553 /* Several three-argument builtins. */
20554 case ARM_BUILTIN_WMACS:
20555 case ARM_BUILTIN_WMACU:
20556 case ARM_BUILTIN_WALIGN:
20557 case ARM_BUILTIN_TMIA:
20558 case ARM_BUILTIN_TMIAPH:
20559 case ARM_BUILTIN_TMIATT:
20560 case ARM_BUILTIN_TMIATB:
20561 case ARM_BUILTIN_TMIABT:
20562 case ARM_BUILTIN_TMIABB:
20563 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
20564 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
20565 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
20566 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
20567 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
20568 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
20569 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
20570 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
20571 : CODE_FOR_iwmmxt_walign);
20572 arg0 = CALL_EXPR_ARG (exp, 0);
20573 arg1 = CALL_EXPR_ARG (exp, 1);
20574 arg2 = CALL_EXPR_ARG (exp, 2);
20575 op0 = expand_normal (arg0);
20576 op1 = expand_normal (arg1);
20577 op2 = expand_normal (arg2);
20578 tmode = insn_data[icode].operand[0].mode;
20579 mode0 = insn_data[icode].operand[1].mode;
20580 mode1 = insn_data[icode].operand[2].mode;
20581 mode2 = insn_data[icode].operand[3].mode;
20583 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20584 op0 = copy_to_mode_reg (mode0, op0);
20585 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20586 op1 = copy_to_mode_reg (mode1, op1);
20587 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20588 op2 = copy_to_mode_reg (mode2, op2);
20590 || GET_MODE (target) != tmode
20591 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20592 target = gen_reg_rtx (tmode);
20593 pat = GEN_FCN (icode) (target, op0, op1, op2);
20599 case ARM_BUILTIN_WZERO:
20600 target = gen_reg_rtx (DImode);
20601 emit_insn (gen_iwmmxt_clrdi (target));
20604 case ARM_BUILTIN_THREAD_POINTER:
20605 return arm_load_tp (target);
20611 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20612 if (d->code == (const enum arm_builtins) fcode)
20613 return arm_expand_binop_builtin (d->icode, exp, target);
20615 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
20616 if (d->code == (const enum arm_builtins) fcode)
20617 return arm_expand_unop_builtin (d->icode, exp, target, 0);
20619 /* @@@ Should really do something sensible here. */
20623 /* Return the number (counting from 0) of
20624 the least significant set bit in MASK. */
20627 number_of_first_bit_set (unsigned mask)
20629 return ctz_hwi (mask);
20632 /* Like emit_multi_reg_push, but allowing for a different set of
20633 registers to be described as saved. MASK is the set of registers
20634 to be saved; REAL_REGS is the set of registers to be described as
20635 saved. If REAL_REGS is 0, only describe the stack adjustment. */
20638 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
20640 unsigned long regno;
20641 rtx par[10], tmp, reg, insn;
20644 /* Build the parallel of the registers actually being stored. */
20645 for (i = 0; mask; ++i, mask &= mask - 1)
20647 regno = ctz_hwi (mask);
20648 reg = gen_rtx_REG (SImode, regno);
20651 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
20653 tmp = gen_rtx_USE (VOIDmode, reg);
20658 tmp = plus_constant (stack_pointer_rtx, -4 * i);
20659 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20660 tmp = gen_frame_mem (BLKmode, tmp);
20661 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
20664 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
20665 insn = emit_insn (tmp);
20667 /* Always build the stack adjustment note for unwind info. */
20668 tmp = plus_constant (stack_pointer_rtx, -4 * i);
20669 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
20672 /* Build the parallel of the registers recorded as saved for unwind. */
20673 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
20675 regno = ctz_hwi (real_regs);
20676 reg = gen_rtx_REG (SImode, regno);
20678 tmp = plus_constant (stack_pointer_rtx, j * 4);
20679 tmp = gen_frame_mem (SImode, tmp);
20680 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
20681 RTX_FRAME_RELATED_P (tmp) = 1;
20689 RTX_FRAME_RELATED_P (par[0]) = 1;
20690 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
20693 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
20698 /* Emit code to push or pop registers to or from the stack. F is the
20699 assembly file. MASK is the registers to pop. */
20701 thumb_pop (FILE *f, unsigned long mask)
20704 int lo_mask = mask & 0xFF;
20705 int pushed_words = 0;
20709 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
20711 /* Special case. Do not generate a POP PC statement here, do it in
20713 thumb_exit (f, -1);
20717 fprintf (f, "\tpop\t{");
20719 /* Look at the low registers first. */
20720 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
20724 asm_fprintf (f, "%r", regno);
20726 if ((lo_mask & ~1) != 0)
20733 if (mask & (1 << PC_REGNUM))
20735 /* Catch popping the PC. */
20736 if (TARGET_INTERWORK || TARGET_BACKTRACE
20737 || crtl->calls_eh_return)
20739 /* The PC is never poped directly, instead
20740 it is popped into r3 and then BX is used. */
20741 fprintf (f, "}\n");
20743 thumb_exit (f, -1);
20752 asm_fprintf (f, "%r", PC_REGNUM);
20756 fprintf (f, "}\n");
20759 /* Generate code to return from a thumb function.
20760 If 'reg_containing_return_addr' is -1, then the return address is
20761 actually on the stack, at the stack pointer. */
20763 thumb_exit (FILE *f, int reg_containing_return_addr)
20765 unsigned regs_available_for_popping;
20766 unsigned regs_to_pop;
20768 unsigned available;
20772 int restore_a4 = FALSE;
20774 /* Compute the registers we need to pop. */
20778 if (reg_containing_return_addr == -1)
20780 regs_to_pop |= 1 << LR_REGNUM;
20784 if (TARGET_BACKTRACE)
20786 /* Restore the (ARM) frame pointer and stack pointer. */
20787 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
20791 /* If there is nothing to pop then just emit the BX instruction and
20793 if (pops_needed == 0)
20795 if (crtl->calls_eh_return)
20796 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20798 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20801 /* Otherwise if we are not supporting interworking and we have not created
20802 a backtrace structure and the function was not entered in ARM mode then
20803 just pop the return address straight into the PC. */
20804 else if (!TARGET_INTERWORK
20805 && !TARGET_BACKTRACE
20806 && !is_called_in_ARM_mode (current_function_decl)
20807 && !crtl->calls_eh_return)
20809 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
20813 /* Find out how many of the (return) argument registers we can corrupt. */
20814 regs_available_for_popping = 0;
20816 /* If returning via __builtin_eh_return, the bottom three registers
20817 all contain information needed for the return. */
20818 if (crtl->calls_eh_return)
20822 /* If we can deduce the registers used from the function's
20823 return value. This is more reliable that examining
20824 df_regs_ever_live_p () because that will be set if the register is
20825 ever used in the function, not just if the register is used
20826 to hold a return value. */
20828 if (crtl->return_rtx != 0)
20829 mode = GET_MODE (crtl->return_rtx);
20831 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20833 size = GET_MODE_SIZE (mode);
20837 /* In a void function we can use any argument register.
20838 In a function that returns a structure on the stack
20839 we can use the second and third argument registers. */
20840 if (mode == VOIDmode)
20841 regs_available_for_popping =
20842 (1 << ARG_REGISTER (1))
20843 | (1 << ARG_REGISTER (2))
20844 | (1 << ARG_REGISTER (3));
20846 regs_available_for_popping =
20847 (1 << ARG_REGISTER (2))
20848 | (1 << ARG_REGISTER (3));
20850 else if (size <= 4)
20851 regs_available_for_popping =
20852 (1 << ARG_REGISTER (2))
20853 | (1 << ARG_REGISTER (3));
20854 else if (size <= 8)
20855 regs_available_for_popping =
20856 (1 << ARG_REGISTER (3));
20859 /* Match registers to be popped with registers into which we pop them. */
20860 for (available = regs_available_for_popping,
20861 required = regs_to_pop;
20862 required != 0 && available != 0;
20863 available &= ~(available & - available),
20864 required &= ~(required & - required))
20867 /* If we have any popping registers left over, remove them. */
20869 regs_available_for_popping &= ~available;
20871 /* Otherwise if we need another popping register we can use
20872 the fourth argument register. */
20873 else if (pops_needed)
20875 /* If we have not found any free argument registers and
20876 reg a4 contains the return address, we must move it. */
20877 if (regs_available_for_popping == 0
20878 && reg_containing_return_addr == LAST_ARG_REGNUM)
20880 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20881 reg_containing_return_addr = LR_REGNUM;
20883 else if (size > 12)
20885 /* Register a4 is being used to hold part of the return value,
20886 but we have dire need of a free, low register. */
20889 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
20892 if (reg_containing_return_addr != LAST_ARG_REGNUM)
20894 /* The fourth argument register is available. */
20895 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
20901 /* Pop as many registers as we can. */
20902 thumb_pop (f, regs_available_for_popping);
20904 /* Process the registers we popped. */
20905 if (reg_containing_return_addr == -1)
20907 /* The return address was popped into the lowest numbered register. */
20908 regs_to_pop &= ~(1 << LR_REGNUM);
20910 reg_containing_return_addr =
20911 number_of_first_bit_set (regs_available_for_popping);
20913 /* Remove this register for the mask of available registers, so that
20914 the return address will not be corrupted by further pops. */
20915 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
20918 /* If we popped other registers then handle them here. */
20919 if (regs_available_for_popping)
20923 /* Work out which register currently contains the frame pointer. */
20924 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
20926 /* Move it into the correct place. */
20927 asm_fprintf (f, "\tmov\t%r, %r\n",
20928 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
20930 /* (Temporarily) remove it from the mask of popped registers. */
20931 regs_available_for_popping &= ~(1 << frame_pointer);
20932 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
20934 if (regs_available_for_popping)
20938 /* We popped the stack pointer as well,
20939 find the register that contains it. */
20940 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
20942 /* Move it into the stack register. */
20943 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
20945 /* At this point we have popped all necessary registers, so
20946 do not worry about restoring regs_available_for_popping
20947 to its correct value:
20949 assert (pops_needed == 0)
20950 assert (regs_available_for_popping == (1 << frame_pointer))
20951 assert (regs_to_pop == (1 << STACK_POINTER)) */
20955 /* Since we have just move the popped value into the frame
20956 pointer, the popping register is available for reuse, and
20957 we know that we still have the stack pointer left to pop. */
20958 regs_available_for_popping |= (1 << frame_pointer);
20962 /* If we still have registers left on the stack, but we no longer have
20963 any registers into which we can pop them, then we must move the return
20964 address into the link register and make available the register that
20966 if (regs_available_for_popping == 0 && pops_needed > 0)
20968 regs_available_for_popping |= 1 << reg_containing_return_addr;
20970 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
20971 reg_containing_return_addr);
20973 reg_containing_return_addr = LR_REGNUM;
20976 /* If we have registers left on the stack then pop some more.
20977 We know that at most we will want to pop FP and SP. */
20978 if (pops_needed > 0)
20983 thumb_pop (f, regs_available_for_popping);
20985 /* We have popped either FP or SP.
20986 Move whichever one it is into the correct register. */
20987 popped_into = number_of_first_bit_set (regs_available_for_popping);
20988 move_to = number_of_first_bit_set (regs_to_pop);
20990 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
20992 regs_to_pop &= ~(1 << move_to);
20997 /* If we still have not popped everything then we must have only
20998 had one register available to us and we are now popping the SP. */
20999 if (pops_needed > 0)
21003 thumb_pop (f, regs_available_for_popping);
21005 popped_into = number_of_first_bit_set (regs_available_for_popping);
21007 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
21009 assert (regs_to_pop == (1 << STACK_POINTER))
21010 assert (pops_needed == 1)
21014 /* If necessary restore the a4 register. */
21017 if (reg_containing_return_addr != LR_REGNUM)
21019 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21020 reg_containing_return_addr = LR_REGNUM;
21023 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
21026 if (crtl->calls_eh_return)
21027 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21029 /* Return to caller. */
21030 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21033 /* Scan INSN just before assembler is output for it.
21034 For Thumb-1, we track the status of the condition codes; this
21035 information is used in the cbranchsi4_insn pattern. */
21037 thumb1_final_prescan_insn (rtx insn)
21039 if (flag_print_asm_name)
21040 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
21041 INSN_ADDRESSES (INSN_UID (insn)));
21042 /* Don't overwrite the previous setter when we get to a cbranch. */
21043 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
21045 enum attr_conds conds;
21047 if (cfun->machine->thumb1_cc_insn)
21049 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
21050 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
21053 conds = get_attr_conds (insn);
21054 if (conds == CONDS_SET)
21056 rtx set = single_set (insn);
21057 cfun->machine->thumb1_cc_insn = insn;
21058 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
21059 cfun->machine->thumb1_cc_op1 = const0_rtx;
21060 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
21061 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
21063 rtx src1 = XEXP (SET_SRC (set), 1);
21064 if (src1 == const0_rtx)
21065 cfun->machine->thumb1_cc_mode = CCmode;
21068 else if (conds != CONDS_NOCOND)
21069 cfun->machine->thumb1_cc_insn = NULL_RTX;
21074 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
21076 unsigned HOST_WIDE_INT mask = 0xff;
21079 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
21080 if (val == 0) /* XXX */
21083 for (i = 0; i < 25; i++)
21084 if ((val & (mask << i)) == val)
21090 /* Returns nonzero if the current function contains,
21091 or might contain a far jump. */
21093 thumb_far_jump_used_p (void)
21097 /* This test is only important for leaf functions. */
21098 /* assert (!leaf_function_p ()); */
21100 /* If we have already decided that far jumps may be used,
21101 do not bother checking again, and always return true even if
21102 it turns out that they are not being used. Once we have made
21103 the decision that far jumps are present (and that hence the link
21104 register will be pushed onto the stack) we cannot go back on it. */
21105 if (cfun->machine->far_jump_used)
21108 /* If this function is not being called from the prologue/epilogue
21109 generation code then it must be being called from the
21110 INITIAL_ELIMINATION_OFFSET macro. */
21111 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
21113 /* In this case we know that we are being asked about the elimination
21114 of the arg pointer register. If that register is not being used,
21115 then there are no arguments on the stack, and we do not have to
21116 worry that a far jump might force the prologue to push the link
21117 register, changing the stack offsets. In this case we can just
21118 return false, since the presence of far jumps in the function will
21119 not affect stack offsets.
21121 If the arg pointer is live (or if it was live, but has now been
21122 eliminated and so set to dead) then we do have to test to see if
21123 the function might contain a far jump. This test can lead to some
21124 false negatives, since before reload is completed, then length of
21125 branch instructions is not known, so gcc defaults to returning their
21126 longest length, which in turn sets the far jump attribute to true.
21128 A false negative will not result in bad code being generated, but it
21129 will result in a needless push and pop of the link register. We
21130 hope that this does not occur too often.
21132 If we need doubleword stack alignment this could affect the other
21133 elimination offsets so we can't risk getting it wrong. */
21134 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
21135 cfun->machine->arg_pointer_live = 1;
21136 else if (!cfun->machine->arg_pointer_live)
21140 /* Check to see if the function contains a branch
21141 insn with the far jump attribute set. */
21142 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21144 if (GET_CODE (insn) == JUMP_INSN
21145 /* Ignore tablejump patterns. */
21146 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21147 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
21148 && get_attr_far_jump (insn) == FAR_JUMP_YES
21151 /* Record the fact that we have decided that
21152 the function does use far jumps. */
21153 cfun->machine->far_jump_used = 1;
21161 /* Return nonzero if FUNC must be entered in ARM mode. */
21163 is_called_in_ARM_mode (tree func)
21165 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
21167 /* Ignore the problem about functions whose address is taken. */
21168 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
21172 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
21178 /* Given the stack offsets and register mask in OFFSETS, decide how
21179 many additional registers to push instead of subtracting a constant
21180 from SP. For epilogues the principle is the same except we use pop.
21181 FOR_PROLOGUE indicates which we're generating. */
21183 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
21185 HOST_WIDE_INT amount;
21186 unsigned long live_regs_mask = offsets->saved_regs_mask;
21187 /* Extract a mask of the ones we can give to the Thumb's push/pop
21189 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
21190 /* Then count how many other high registers will need to be pushed. */
21191 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21192 int n_free, reg_base;
21194 if (!for_prologue && frame_pointer_needed)
21195 amount = offsets->locals_base - offsets->saved_regs;
21197 amount = offsets->outgoing_args - offsets->saved_regs;
21199 /* If the stack frame size is 512 exactly, we can save one load
21200 instruction, which should make this a win even when optimizing
21202 if (!optimize_size && amount != 512)
21205 /* Can't do this if there are high registers to push. */
21206 if (high_regs_pushed != 0)
21209 /* Shouldn't do it in the prologue if no registers would normally
21210 be pushed at all. In the epilogue, also allow it if we'll have
21211 a pop insn for the PC. */
21214 || TARGET_BACKTRACE
21215 || (live_regs_mask & 1 << LR_REGNUM) == 0
21216 || TARGET_INTERWORK
21217 || crtl->args.pretend_args_size != 0))
21220 /* Don't do this if thumb_expand_prologue wants to emit instructions
21221 between the push and the stack frame allocation. */
21223 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
21224 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
21231 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
21232 live_regs_mask >>= reg_base;
21235 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
21236 && (for_prologue || call_used_regs[reg_base + n_free]))
21238 live_regs_mask >>= 1;
21244 gcc_assert (amount / 4 * 4 == amount);
21246 if (amount >= 512 && (amount - n_free * 4) < 512)
21247 return (amount - 508) / 4;
21248 if (amount <= n_free * 4)
21253 /* The bits which aren't usefully expanded as rtl. */
21255 thumb_unexpanded_epilogue (void)
21257 arm_stack_offsets *offsets;
21259 unsigned long live_regs_mask = 0;
21260 int high_regs_pushed = 0;
21262 int had_to_push_lr;
21265 if (cfun->machine->return_used_this_function != 0)
21268 if (IS_NAKED (arm_current_func_type ()))
21271 offsets = arm_get_frame_offsets ();
21272 live_regs_mask = offsets->saved_regs_mask;
21273 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21275 /* If we can deduce the registers used from the function's return value.
21276 This is more reliable that examining df_regs_ever_live_p () because that
21277 will be set if the register is ever used in the function, not just if
21278 the register is used to hold a return value. */
21279 size = arm_size_return_regs ();
21281 extra_pop = thumb1_extra_regs_pushed (offsets, false);
21284 unsigned long extra_mask = (1 << extra_pop) - 1;
21285 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
21288 /* The prolog may have pushed some high registers to use as
21289 work registers. e.g. the testsuite file:
21290 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
21291 compiles to produce:
21292 push {r4, r5, r6, r7, lr}
21296 as part of the prolog. We have to undo that pushing here. */
21298 if (high_regs_pushed)
21300 unsigned long mask = live_regs_mask & 0xff;
21303 /* The available low registers depend on the size of the value we are
21311 /* Oh dear! We have no low registers into which we can pop
21314 ("no low registers available for popping high registers");
21316 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
21317 if (live_regs_mask & (1 << next_hi_reg))
21320 while (high_regs_pushed)
21322 /* Find lo register(s) into which the high register(s) can
21324 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21326 if (mask & (1 << regno))
21327 high_regs_pushed--;
21328 if (high_regs_pushed == 0)
21332 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
21334 /* Pop the values into the low register(s). */
21335 thumb_pop (asm_out_file, mask);
21337 /* Move the value(s) into the high registers. */
21338 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21340 if (mask & (1 << regno))
21342 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
21345 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
21346 if (live_regs_mask & (1 << next_hi_reg))
21351 live_regs_mask &= ~0x0f00;
21354 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
21355 live_regs_mask &= 0xff;
21357 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
21359 /* Pop the return address into the PC. */
21360 if (had_to_push_lr)
21361 live_regs_mask |= 1 << PC_REGNUM;
21363 /* Either no argument registers were pushed or a backtrace
21364 structure was created which includes an adjusted stack
21365 pointer, so just pop everything. */
21366 if (live_regs_mask)
21367 thumb_pop (asm_out_file, live_regs_mask);
21369 /* We have either just popped the return address into the
21370 PC or it is was kept in LR for the entire function.
21371 Note that thumb_pop has already called thumb_exit if the
21372 PC was in the list. */
21373 if (!had_to_push_lr)
21374 thumb_exit (asm_out_file, LR_REGNUM);
21378 /* Pop everything but the return address. */
21379 if (live_regs_mask)
21380 thumb_pop (asm_out_file, live_regs_mask);
21382 if (had_to_push_lr)
21386 /* We have no free low regs, so save one. */
21387 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
21391 /* Get the return address into a temporary register. */
21392 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
21396 /* Move the return address to lr. */
21397 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
21399 /* Restore the low register. */
21400 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
21405 regno = LAST_ARG_REGNUM;
21410 /* Remove the argument registers that were pushed onto the stack. */
21411 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
21412 SP_REGNUM, SP_REGNUM,
21413 crtl->args.pretend_args_size);
21415 thumb_exit (asm_out_file, regno);
21421 /* Functions to save and restore machine-specific function data. */
21422 static struct machine_function *
21423 arm_init_machine_status (void)
21425 struct machine_function *machine;
21426 machine = ggc_alloc_cleared_machine_function ();
21428 #if ARM_FT_UNKNOWN != 0
21429 machine->func_type = ARM_FT_UNKNOWN;
21434 /* Return an RTX indicating where the return address to the
21435 calling function can be found. */
21437 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
21442 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
21445 /* Do anything needed before RTL is emitted for each function. */
21447 arm_init_expanders (void)
21449 /* Arrange to initialize and mark the machine per-function status. */
21450 init_machine_status = arm_init_machine_status;
21452 /* This is to stop the combine pass optimizing away the alignment
21453 adjustment of va_arg. */
21454 /* ??? It is claimed that this should not be necessary. */
21456 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
21460 /* Like arm_compute_initial_elimination offset. Simpler because there
21461 isn't an ABI specified frame pointer for Thumb. Instead, we set it
21462 to point at the base of the local variables after static stack
21463 space for a function has been allocated. */
21466 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21468 arm_stack_offsets *offsets;
21470 offsets = arm_get_frame_offsets ();
21474 case ARG_POINTER_REGNUM:
21477 case STACK_POINTER_REGNUM:
21478 return offsets->outgoing_args - offsets->saved_args;
21480 case FRAME_POINTER_REGNUM:
21481 return offsets->soft_frame - offsets->saved_args;
21483 case ARM_HARD_FRAME_POINTER_REGNUM:
21484 return offsets->saved_regs - offsets->saved_args;
21486 case THUMB_HARD_FRAME_POINTER_REGNUM:
21487 return offsets->locals_base - offsets->saved_args;
21490 gcc_unreachable ();
21494 case FRAME_POINTER_REGNUM:
21497 case STACK_POINTER_REGNUM:
21498 return offsets->outgoing_args - offsets->soft_frame;
21500 case ARM_HARD_FRAME_POINTER_REGNUM:
21501 return offsets->saved_regs - offsets->soft_frame;
21503 case THUMB_HARD_FRAME_POINTER_REGNUM:
21504 return offsets->locals_base - offsets->soft_frame;
21507 gcc_unreachable ();
21512 gcc_unreachable ();
21516 /* Generate the function's prologue. */
21519 thumb1_expand_prologue (void)
21523 HOST_WIDE_INT amount;
21524 arm_stack_offsets *offsets;
21525 unsigned long func_type;
21527 unsigned long live_regs_mask;
21528 unsigned long l_mask;
21529 unsigned high_regs_pushed = 0;
21531 func_type = arm_current_func_type ();
21533 /* Naked functions don't have prologues. */
21534 if (IS_NAKED (func_type))
21537 if (IS_INTERRUPT (func_type))
21539 error ("interrupt Service Routines cannot be coded in Thumb mode");
21543 if (is_called_in_ARM_mode (current_function_decl))
21544 emit_insn (gen_prologue_thumb1_interwork ());
21546 offsets = arm_get_frame_offsets ();
21547 live_regs_mask = offsets->saved_regs_mask;
21549 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
21550 l_mask = live_regs_mask & 0x40ff;
21551 /* Then count how many other high registers will need to be pushed. */
21552 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21554 if (crtl->args.pretend_args_size)
21556 rtx x = GEN_INT (-crtl->args.pretend_args_size);
21558 if (cfun->machine->uses_anonymous_args)
21560 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
21561 unsigned long mask;
21563 mask = 1ul << (LAST_ARG_REGNUM + 1);
21564 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
21566 insn = thumb1_emit_multi_reg_push (mask, 0);
21570 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
21571 stack_pointer_rtx, x));
21573 RTX_FRAME_RELATED_P (insn) = 1;
21576 if (TARGET_BACKTRACE)
21578 HOST_WIDE_INT offset = 0;
21579 unsigned work_register;
21580 rtx work_reg, x, arm_hfp_rtx;
21582 /* We have been asked to create a stack backtrace structure.
21583 The code looks like this:
21587 0 sub SP, #16 Reserve space for 4 registers.
21588 2 push {R7} Push low registers.
21589 4 add R7, SP, #20 Get the stack pointer before the push.
21590 6 str R7, [SP, #8] Store the stack pointer
21591 (before reserving the space).
21592 8 mov R7, PC Get hold of the start of this code + 12.
21593 10 str R7, [SP, #16] Store it.
21594 12 mov R7, FP Get hold of the current frame pointer.
21595 14 str R7, [SP, #4] Store it.
21596 16 mov R7, LR Get hold of the current return address.
21597 18 str R7, [SP, #12] Store it.
21598 20 add R7, SP, #16 Point at the start of the
21599 backtrace structure.
21600 22 mov FP, R7 Put this value into the frame pointer. */
21602 work_register = thumb_find_work_register (live_regs_mask);
21603 work_reg = gen_rtx_REG (SImode, work_register);
21604 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
21606 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
21607 stack_pointer_rtx, GEN_INT (-16)));
21608 RTX_FRAME_RELATED_P (insn) = 1;
21612 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
21613 RTX_FRAME_RELATED_P (insn) = 1;
21615 offset = bit_count (l_mask) * UNITS_PER_WORD;
21618 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
21619 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
21621 x = plus_constant (stack_pointer_rtx, offset + 4);
21622 x = gen_frame_mem (SImode, x);
21623 emit_move_insn (x, work_reg);
21625 /* Make sure that the instruction fetching the PC is in the right place
21626 to calculate "start of backtrace creation code + 12". */
21627 /* ??? The stores using the common WORK_REG ought to be enough to
21628 prevent the scheduler from doing anything weird. Failing that
21629 we could always move all of the following into an UNSPEC_VOLATILE. */
21632 x = gen_rtx_REG (SImode, PC_REGNUM);
21633 emit_move_insn (work_reg, x);
21635 x = plus_constant (stack_pointer_rtx, offset + 12);
21636 x = gen_frame_mem (SImode, x);
21637 emit_move_insn (x, work_reg);
21639 emit_move_insn (work_reg, arm_hfp_rtx);
21641 x = plus_constant (stack_pointer_rtx, offset);
21642 x = gen_frame_mem (SImode, x);
21643 emit_move_insn (x, work_reg);
21647 emit_move_insn (work_reg, arm_hfp_rtx);
21649 x = plus_constant (stack_pointer_rtx, offset);
21650 x = gen_frame_mem (SImode, x);
21651 emit_move_insn (x, work_reg);
21653 x = gen_rtx_REG (SImode, PC_REGNUM);
21654 emit_move_insn (work_reg, x);
21656 x = plus_constant (stack_pointer_rtx, offset + 12);
21657 x = gen_frame_mem (SImode, x);
21658 emit_move_insn (x, work_reg);
21661 x = gen_rtx_REG (SImode, LR_REGNUM);
21662 emit_move_insn (work_reg, x);
21664 x = plus_constant (stack_pointer_rtx, offset + 8);
21665 x = gen_frame_mem (SImode, x);
21666 emit_move_insn (x, work_reg);
21668 x = GEN_INT (offset + 12);
21669 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
21671 emit_move_insn (arm_hfp_rtx, work_reg);
21673 /* Optimization: If we are not pushing any low registers but we are going
21674 to push some high registers then delay our first push. This will just
21675 be a push of LR and we can combine it with the push of the first high
21677 else if ((l_mask & 0xff) != 0
21678 || (high_regs_pushed == 0 && l_mask))
21680 unsigned long mask = l_mask;
21681 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
21682 insn = thumb1_emit_multi_reg_push (mask, mask);
21683 RTX_FRAME_RELATED_P (insn) = 1;
21686 if (high_regs_pushed)
21688 unsigned pushable_regs;
21689 unsigned next_hi_reg;
21691 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
21692 if (live_regs_mask & (1 << next_hi_reg))
21695 pushable_regs = l_mask & 0xff;
21697 if (pushable_regs == 0)
21698 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
21700 while (high_regs_pushed > 0)
21702 unsigned long real_regs_mask = 0;
21704 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
21706 if (pushable_regs & (1 << regno))
21708 emit_move_insn (gen_rtx_REG (SImode, regno),
21709 gen_rtx_REG (SImode, next_hi_reg));
21711 high_regs_pushed --;
21712 real_regs_mask |= (1 << next_hi_reg);
21714 if (high_regs_pushed)
21716 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
21718 if (live_regs_mask & (1 << next_hi_reg))
21723 pushable_regs &= ~((1 << regno) - 1);
21729 /* If we had to find a work register and we have not yet
21730 saved the LR then add it to the list of regs to push. */
21731 if (l_mask == (1 << LR_REGNUM))
21733 pushable_regs |= l_mask;
21734 real_regs_mask |= l_mask;
21738 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
21739 RTX_FRAME_RELATED_P (insn) = 1;
21743 /* Load the pic register before setting the frame pointer,
21744 so we can use r7 as a temporary work register. */
21745 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21746 arm_load_pic_register (live_regs_mask);
21748 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21749 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
21750 stack_pointer_rtx);
21752 if (flag_stack_usage_info)
21753 current_function_static_stack_size
21754 = offsets->outgoing_args - offsets->saved_args;
21756 amount = offsets->outgoing_args - offsets->saved_regs;
21757 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
21762 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21763 GEN_INT (- amount)));
21764 RTX_FRAME_RELATED_P (insn) = 1;
21770 /* The stack decrement is too big for an immediate value in a single
21771 insn. In theory we could issue multiple subtracts, but after
21772 three of them it becomes more space efficient to place the full
21773 value in the constant pool and load into a register. (Also the
21774 ARM debugger really likes to see only one stack decrement per
21775 function). So instead we look for a scratch register into which
21776 we can load the decrement, and then we subtract this from the
21777 stack pointer. Unfortunately on the thumb the only available
21778 scratch registers are the argument registers, and we cannot use
21779 these as they may hold arguments to the function. Instead we
21780 attempt to locate a call preserved register which is used by this
21781 function. If we can find one, then we know that it will have
21782 been pushed at the start of the prologue and so we can corrupt
21784 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
21785 if (live_regs_mask & (1 << regno))
21788 gcc_assert(regno <= LAST_LO_REGNUM);
21790 reg = gen_rtx_REG (SImode, regno);
21792 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
21794 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
21795 stack_pointer_rtx, reg));
21797 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21798 plus_constant (stack_pointer_rtx,
21800 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21801 RTX_FRAME_RELATED_P (insn) = 1;
21805 if (frame_pointer_needed)
21806 thumb_set_frame_pointer (offsets);
21808 /* If we are profiling, make sure no instructions are scheduled before
21809 the call to mcount. Similarly if the user has requested no
21810 scheduling in the prolog. Similarly if we want non-call exceptions
21811 using the EABI unwinder, to prevent faulting instructions from being
21812 swapped with a stack adjustment. */
21813 if (crtl->profile || !TARGET_SCHED_PROLOG
21814 || (arm_except_unwind_info (&global_options) == UI_TARGET
21815 && cfun->can_throw_non_call_exceptions))
21816 emit_insn (gen_blockage ());
21818 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
21819 if (live_regs_mask & 0xff)
21820 cfun->machine->lr_save_eliminated = 0;
21825 thumb1_expand_epilogue (void)
21827 HOST_WIDE_INT amount;
21828 arm_stack_offsets *offsets;
21831 /* Naked functions don't have prologues. */
21832 if (IS_NAKED (arm_current_func_type ()))
21835 offsets = arm_get_frame_offsets ();
21836 amount = offsets->outgoing_args - offsets->saved_regs;
21838 if (frame_pointer_needed)
21840 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
21841 amount = offsets->locals_base - offsets->saved_regs;
21843 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
21845 gcc_assert (amount >= 0);
21849 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21850 GEN_INT (amount)));
21853 /* r3 is always free in the epilogue. */
21854 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
21856 emit_insn (gen_movsi (reg, GEN_INT (amount)));
21857 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
21861 /* Emit a USE (stack_pointer_rtx), so that
21862 the stack adjustment will not be deleted. */
21863 emit_insn (gen_prologue_use (stack_pointer_rtx));
21865 if (crtl->profile || !TARGET_SCHED_PROLOG)
21866 emit_insn (gen_blockage ());
21868 /* Emit a clobber for each insn that will be restored in the epilogue,
21869 so that flow2 will get register lifetimes correct. */
21870 for (regno = 0; regno < 13; regno++)
21871 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
21872 emit_clobber (gen_rtx_REG (SImode, regno));
21874 if (! df_regs_ever_live_p (LR_REGNUM))
21875 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
21878 /* Implementation of insn prologue_thumb1_interwork. This is the first
21879 "instruction" of a function called in ARM mode. Swap to thumb mode. */
21882 thumb1_output_interwork (void)
21885 FILE *f = asm_out_file;
21887 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
21888 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
21890 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
21892 /* Generate code sequence to switch us into Thumb mode. */
21893 /* The .code 32 directive has already been emitted by
21894 ASM_DECLARE_FUNCTION_NAME. */
21895 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
21896 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
21898 /* Generate a label, so that the debugger will notice the
21899 change in instruction sets. This label is also used by
21900 the assembler to bypass the ARM code when this function
21901 is called from a Thumb encoded function elsewhere in the
21902 same file. Hence the definition of STUB_NAME here must
21903 agree with the definition in gas/config/tc-arm.c. */
21905 #define STUB_NAME ".real_start_of"
21907 fprintf (f, "\t.code\t16\n");
21909 if (arm_dllexport_name_p (name))
21910 name = arm_strip_name_encoding (name);
21912 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
21913 fprintf (f, "\t.thumb_func\n");
21914 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
21919 /* Handle the case of a double word load into a low register from
21920 a computed memory address. The computed address may involve a
21921 register which is overwritten by the load. */
21923 thumb_load_double_from_address (rtx *operands)
21931 gcc_assert (GET_CODE (operands[0]) == REG);
21932 gcc_assert (GET_CODE (operands[1]) == MEM);
21934 /* Get the memory address. */
21935 addr = XEXP (operands[1], 0);
21937 /* Work out how the memory address is computed. */
21938 switch (GET_CODE (addr))
21941 operands[2] = adjust_address (operands[1], SImode, 4);
21943 if (REGNO (operands[0]) == REGNO (addr))
21945 output_asm_insn ("ldr\t%H0, %2", operands);
21946 output_asm_insn ("ldr\t%0, %1", operands);
21950 output_asm_insn ("ldr\t%0, %1", operands);
21951 output_asm_insn ("ldr\t%H0, %2", operands);
21956 /* Compute <address> + 4 for the high order load. */
21957 operands[2] = adjust_address (operands[1], SImode, 4);
21959 output_asm_insn ("ldr\t%0, %1", operands);
21960 output_asm_insn ("ldr\t%H0, %2", operands);
21964 arg1 = XEXP (addr, 0);
21965 arg2 = XEXP (addr, 1);
21967 if (CONSTANT_P (arg1))
21968 base = arg2, offset = arg1;
21970 base = arg1, offset = arg2;
21972 gcc_assert (GET_CODE (base) == REG);
21974 /* Catch the case of <address> = <reg> + <reg> */
21975 if (GET_CODE (offset) == REG)
21977 int reg_offset = REGNO (offset);
21978 int reg_base = REGNO (base);
21979 int reg_dest = REGNO (operands[0]);
21981 /* Add the base and offset registers together into the
21982 higher destination register. */
21983 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
21984 reg_dest + 1, reg_base, reg_offset);
21986 /* Load the lower destination register from the address in
21987 the higher destination register. */
21988 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
21989 reg_dest, reg_dest + 1);
21991 /* Load the higher destination register from its own address
21993 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
21994 reg_dest + 1, reg_dest + 1);
21998 /* Compute <address> + 4 for the high order load. */
21999 operands[2] = adjust_address (operands[1], SImode, 4);
22001 /* If the computed address is held in the low order register
22002 then load the high order register first, otherwise always
22003 load the low order register first. */
22004 if (REGNO (operands[0]) == REGNO (base))
22006 output_asm_insn ("ldr\t%H0, %2", operands);
22007 output_asm_insn ("ldr\t%0, %1", operands);
22011 output_asm_insn ("ldr\t%0, %1", operands);
22012 output_asm_insn ("ldr\t%H0, %2", operands);
22018 /* With no registers to worry about we can just load the value
22020 operands[2] = adjust_address (operands[1], SImode, 4);
22022 output_asm_insn ("ldr\t%H0, %2", operands);
22023 output_asm_insn ("ldr\t%0, %1", operands);
22027 gcc_unreachable ();
22034 thumb_output_move_mem_multiple (int n, rtx *operands)
22041 if (REGNO (operands[4]) > REGNO (operands[5]))
22044 operands[4] = operands[5];
22047 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
22048 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
22052 if (REGNO (operands[4]) > REGNO (operands[5]))
22055 operands[4] = operands[5];
22058 if (REGNO (operands[5]) > REGNO (operands[6]))
22061 operands[5] = operands[6];
22064 if (REGNO (operands[4]) > REGNO (operands[5]))
22067 operands[4] = operands[5];
22071 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
22072 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
22076 gcc_unreachable ();
22082 /* Output a call-via instruction for thumb state. */
22084 thumb_call_via_reg (rtx reg)
22086 int regno = REGNO (reg);
22089 gcc_assert (regno < LR_REGNUM);
22091 /* If we are in the normal text section we can use a single instance
22092 per compilation unit. If we are doing function sections, then we need
22093 an entry per section, since we can't rely on reachability. */
22094 if (in_section == text_section)
22096 thumb_call_reg_needed = 1;
22098 if (thumb_call_via_label[regno] == NULL)
22099 thumb_call_via_label[regno] = gen_label_rtx ();
22100 labelp = thumb_call_via_label + regno;
22104 if (cfun->machine->call_via[regno] == NULL)
22105 cfun->machine->call_via[regno] = gen_label_rtx ();
22106 labelp = cfun->machine->call_via + regno;
22109 output_asm_insn ("bl\t%a0", labelp);
22113 /* Routines for generating rtl. */
22115 thumb_expand_movmemqi (rtx *operands)
22117 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
22118 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
22119 HOST_WIDE_INT len = INTVAL (operands[2]);
22120 HOST_WIDE_INT offset = 0;
22124 emit_insn (gen_movmem12b (out, in, out, in));
22130 emit_insn (gen_movmem8b (out, in, out, in));
22136 rtx reg = gen_reg_rtx (SImode);
22137 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
22138 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
22145 rtx reg = gen_reg_rtx (HImode);
22146 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
22147 plus_constant (in, offset))));
22148 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
22156 rtx reg = gen_reg_rtx (QImode);
22157 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
22158 plus_constant (in, offset))));
22159 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
22165 thumb_reload_out_hi (rtx *operands)
22167 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
22170 /* Handle reading a half-word from memory during reload. */
22172 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
22174 gcc_unreachable ();
22177 /* Return the length of a function name prefix
22178 that starts with the character 'c'. */
22180 arm_get_strip_length (int c)
22184 ARM_NAME_ENCODING_LENGTHS
22189 /* Return a pointer to a function's name with any
22190 and all prefix encodings stripped from it. */
22192 arm_strip_name_encoding (const char *name)
22196 while ((skip = arm_get_strip_length (* name)))
22202 /* If there is a '*' anywhere in the name's prefix, then
22203 emit the stripped name verbatim, otherwise prepend an
22204 underscore if leading underscores are being used. */
22206 arm_asm_output_labelref (FILE *stream, const char *name)
22211 while ((skip = arm_get_strip_length (* name)))
22213 verbatim |= (*name == '*');
22218 fputs (name, stream);
22220 asm_fprintf (stream, "%U%s", name);
22224 arm_file_start (void)
22228 if (TARGET_UNIFIED_ASM)
22229 asm_fprintf (asm_out_file, "\t.syntax unified\n");
22233 const char *fpu_name;
22234 if (arm_selected_arch)
22235 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
22236 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
22237 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
22239 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
22241 if (TARGET_SOFT_FLOAT)
22244 fpu_name = "softvfp";
22246 fpu_name = "softfpa";
22250 fpu_name = arm_fpu_desc->name;
22251 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
22253 if (TARGET_HARD_FLOAT)
22254 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
22255 if (TARGET_HARD_FLOAT_ABI)
22256 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
22259 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
22261 /* Some of these attributes only apply when the corresponding features
22262 are used. However we don't have any easy way of figuring this out.
22263 Conservatively record the setting that would have been used. */
22265 /* Tag_ABI_FP_rounding. */
22266 if (flag_rounding_math)
22267 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
22268 if (!flag_unsafe_math_optimizations)
22270 /* Tag_ABI_FP_denomal. */
22271 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
22272 /* Tag_ABI_FP_exceptions. */
22273 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
22275 /* Tag_ABI_FP_user_exceptions. */
22276 if (flag_signaling_nans)
22277 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
22278 /* Tag_ABI_FP_number_model. */
22279 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
22280 flag_finite_math_only ? 1 : 3);
22282 /* Tag_ABI_align8_needed. */
22283 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
22284 /* Tag_ABI_align8_preserved. */
22285 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
22286 /* Tag_ABI_enum_size. */
22287 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
22288 flag_short_enums ? 1 : 2);
22290 /* Tag_ABI_optimization_goals. */
22293 else if (optimize >= 2)
22299 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
22301 /* Tag_CPU_unaligned_access. */
22302 asm_fprintf (asm_out_file, "\t.eabi_attribute 34, %d\n",
22305 /* Tag_ABI_FP_16bit_format. */
22306 if (arm_fp16_format)
22307 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
22308 (int)arm_fp16_format);
22310 if (arm_lang_output_object_attributes_hook)
22311 arm_lang_output_object_attributes_hook();
22313 default_file_start();
22317 arm_file_end (void)
22321 if (NEED_INDICATE_EXEC_STACK)
22322 /* Add .note.GNU-stack. */
22323 file_end_indicate_exec_stack ();
22325 if (! thumb_call_reg_needed)
22328 switch_to_section (text_section);
22329 asm_fprintf (asm_out_file, "\t.code 16\n");
22330 ASM_OUTPUT_ALIGN (asm_out_file, 1);
22332 for (regno = 0; regno < LR_REGNUM; regno++)
22334 rtx label = thumb_call_via_label[regno];
22338 targetm.asm_out.internal_label (asm_out_file, "L",
22339 CODE_LABEL_NUMBER (label));
22340 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
22346 /* Symbols in the text segment can be accessed without indirecting via the
22347 constant pool; it may take an extra binary operation, but this is still
22348 faster than indirecting via memory. Don't do this when not optimizing,
22349 since we won't be calculating al of the offsets necessary to do this
22353 arm_encode_section_info (tree decl, rtx rtl, int first)
22355 if (optimize > 0 && TREE_CONSTANT (decl))
22356 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
22358 default_encode_section_info (decl, rtl, first);
22360 #endif /* !ARM_PE */
22363 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
22365 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
22366 && !strcmp (prefix, "L"))
22368 arm_ccfsm_state = 0;
22369 arm_target_insn = NULL;
22371 default_internal_label (stream, prefix, labelno);
22374 /* Output code to add DELTA to the first argument, and then jump
22375 to FUNCTION. Used for C++ multiple inheritance. */
22377 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
22378 HOST_WIDE_INT delta,
22379 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
22382 static int thunk_label = 0;
22385 int mi_delta = delta;
22386 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
22388 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
22391 mi_delta = - mi_delta;
22395 int labelno = thunk_label++;
22396 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
22397 /* Thunks are entered in arm mode when avaiable. */
22398 if (TARGET_THUMB1_ONLY)
22400 /* push r3 so we can use it as a temporary. */
22401 /* TODO: Omit this save if r3 is not used. */
22402 fputs ("\tpush {r3}\n", file);
22403 fputs ("\tldr\tr3, ", file);
22407 fputs ("\tldr\tr12, ", file);
22409 assemble_name (file, label);
22410 fputc ('\n', file);
22413 /* If we are generating PIC, the ldr instruction below loads
22414 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
22415 the address of the add + 8, so we have:
22417 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
22420 Note that we have "+ 1" because some versions of GNU ld
22421 don't set the low bit of the result for R_ARM_REL32
22422 relocations against thumb function symbols.
22423 On ARMv6M this is +4, not +8. */
22424 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
22425 assemble_name (file, labelpc);
22426 fputs (":\n", file);
22427 if (TARGET_THUMB1_ONLY)
22429 /* This is 2 insns after the start of the thunk, so we know it
22430 is 4-byte aligned. */
22431 fputs ("\tadd\tr3, pc, r3\n", file);
22432 fputs ("\tmov r12, r3\n", file);
22435 fputs ("\tadd\tr12, pc, r12\n", file);
22437 else if (TARGET_THUMB1_ONLY)
22438 fputs ("\tmov r12, r3\n", file);
22440 if (TARGET_THUMB1_ONLY)
22442 if (mi_delta > 255)
22444 fputs ("\tldr\tr3, ", file);
22445 assemble_name (file, label);
22446 fputs ("+4\n", file);
22447 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
22448 mi_op, this_regno, this_regno);
22450 else if (mi_delta != 0)
22452 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
22453 mi_op, this_regno, this_regno,
22459 /* TODO: Use movw/movt for large constants when available. */
22460 while (mi_delta != 0)
22462 if ((mi_delta & (3 << shift)) == 0)
22466 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
22467 mi_op, this_regno, this_regno,
22468 mi_delta & (0xff << shift));
22469 mi_delta &= ~(0xff << shift);
22476 if (TARGET_THUMB1_ONLY)
22477 fputs ("\tpop\t{r3}\n", file);
22479 fprintf (file, "\tbx\tr12\n");
22480 ASM_OUTPUT_ALIGN (file, 2);
22481 assemble_name (file, label);
22482 fputs (":\n", file);
22485 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
22486 rtx tem = XEXP (DECL_RTL (function), 0);
22487 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
22488 tem = gen_rtx_MINUS (GET_MODE (tem),
22490 gen_rtx_SYMBOL_REF (Pmode,
22491 ggc_strdup (labelpc)));
22492 assemble_integer (tem, 4, BITS_PER_WORD, 1);
22495 /* Output ".word .LTHUNKn". */
22496 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
22498 if (TARGET_THUMB1_ONLY && mi_delta > 255)
22499 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
22503 fputs ("\tb\t", file);
22504 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
22505 if (NEED_PLT_RELOC)
22506 fputs ("(PLT)", file);
22507 fputc ('\n', file);
22512 arm_emit_vector_const (FILE *file, rtx x)
22515 const char * pattern;
22517 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22519 switch (GET_MODE (x))
22521 case V2SImode: pattern = "%08x"; break;
22522 case V4HImode: pattern = "%04x"; break;
22523 case V8QImode: pattern = "%02x"; break;
22524 default: gcc_unreachable ();
22527 fprintf (file, "0x");
22528 for (i = CONST_VECTOR_NUNITS (x); i--;)
22532 element = CONST_VECTOR_ELT (x, i);
22533 fprintf (file, pattern, INTVAL (element));
22539 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
22540 HFmode constant pool entries are actually loaded with ldr. */
22542 arm_emit_fp16_const (rtx c)
22547 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
22548 bits = real_to_target (NULL, &r, HFmode);
22549 if (WORDS_BIG_ENDIAN)
22550 assemble_zeros (2);
22551 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
22552 if (!WORDS_BIG_ENDIAN)
22553 assemble_zeros (2);
22557 arm_output_load_gr (rtx *operands)
22564 if (GET_CODE (operands [1]) != MEM
22565 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
22566 || GET_CODE (reg = XEXP (sum, 0)) != REG
22567 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
22568 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
22569 return "wldrw%?\t%0, %1";
22571 /* Fix up an out-of-range load of a GR register. */
22572 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
22573 wcgr = operands[0];
22575 output_asm_insn ("ldr%?\t%0, %1", operands);
22577 operands[0] = wcgr;
22579 output_asm_insn ("tmcr%?\t%0, %1", operands);
22580 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
22585 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
22587 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
22588 named arg and all anonymous args onto the stack.
22589 XXX I know the prologue shouldn't be pushing registers, but it is faster
22593 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
22594 enum machine_mode mode,
22597 int second_time ATTRIBUTE_UNUSED)
22599 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
22602 cfun->machine->uses_anonymous_args = 1;
22603 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
22605 nregs = pcum->aapcs_ncrn;
22606 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
22610 nregs = pcum->nregs;
22612 if (nregs < NUM_ARG_REGS)
22613 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
22616 /* Return nonzero if the CONSUMER instruction (a store) does not need
22617 PRODUCER's value to calculate the address. */
22620 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
22622 rtx value = PATTERN (producer);
22623 rtx addr = PATTERN (consumer);
22625 if (GET_CODE (value) == COND_EXEC)
22626 value = COND_EXEC_CODE (value);
22627 if (GET_CODE (value) == PARALLEL)
22628 value = XVECEXP (value, 0, 0);
22629 value = XEXP (value, 0);
22630 if (GET_CODE (addr) == COND_EXEC)
22631 addr = COND_EXEC_CODE (addr);
22632 if (GET_CODE (addr) == PARALLEL)
22633 addr = XVECEXP (addr, 0, 0);
22634 addr = XEXP (addr, 0);
22636 return !reg_overlap_mentioned_p (value, addr);
22639 /* Return nonzero if the CONSUMER instruction (a store) does need
22640 PRODUCER's value to calculate the address. */
22643 arm_early_store_addr_dep (rtx producer, rtx consumer)
22645 return !arm_no_early_store_addr_dep (producer, consumer);
22648 /* Return nonzero if the CONSUMER instruction (a load) does need
22649 PRODUCER's value to calculate the address. */
22652 arm_early_load_addr_dep (rtx producer, rtx consumer)
22654 rtx value = PATTERN (producer);
22655 rtx addr = PATTERN (consumer);
22657 if (GET_CODE (value) == COND_EXEC)
22658 value = COND_EXEC_CODE (value);
22659 if (GET_CODE (value) == PARALLEL)
22660 value = XVECEXP (value, 0, 0);
22661 value = XEXP (value, 0);
22662 if (GET_CODE (addr) == COND_EXEC)
22663 addr = COND_EXEC_CODE (addr);
22664 if (GET_CODE (addr) == PARALLEL)
22665 addr = XVECEXP (addr, 0, 0);
22666 addr = XEXP (addr, 1);
22668 return reg_overlap_mentioned_p (value, addr);
22671 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
22672 have an early register shift value or amount dependency on the
22673 result of PRODUCER. */
22676 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
22678 rtx value = PATTERN (producer);
22679 rtx op = PATTERN (consumer);
22682 if (GET_CODE (value) == COND_EXEC)
22683 value = COND_EXEC_CODE (value);
22684 if (GET_CODE (value) == PARALLEL)
22685 value = XVECEXP (value, 0, 0);
22686 value = XEXP (value, 0);
22687 if (GET_CODE (op) == COND_EXEC)
22688 op = COND_EXEC_CODE (op);
22689 if (GET_CODE (op) == PARALLEL)
22690 op = XVECEXP (op, 0, 0);
22693 early_op = XEXP (op, 0);
22694 /* This is either an actual independent shift, or a shift applied to
22695 the first operand of another operation. We want the whole shift
22697 if (GET_CODE (early_op) == REG)
22700 return !reg_overlap_mentioned_p (value, early_op);
22703 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
22704 have an early register shift value dependency on the result of
22708 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
22710 rtx value = PATTERN (producer);
22711 rtx op = PATTERN (consumer);
22714 if (GET_CODE (value) == COND_EXEC)
22715 value = COND_EXEC_CODE (value);
22716 if (GET_CODE (value) == PARALLEL)
22717 value = XVECEXP (value, 0, 0);
22718 value = XEXP (value, 0);
22719 if (GET_CODE (op) == COND_EXEC)
22720 op = COND_EXEC_CODE (op);
22721 if (GET_CODE (op) == PARALLEL)
22722 op = XVECEXP (op, 0, 0);
22725 early_op = XEXP (op, 0);
22727 /* This is either an actual independent shift, or a shift applied to
22728 the first operand of another operation. We want the value being
22729 shifted, in either case. */
22730 if (GET_CODE (early_op) != REG)
22731 early_op = XEXP (early_op, 0);
22733 return !reg_overlap_mentioned_p (value, early_op);
22736 /* Return nonzero if the CONSUMER (a mul or mac op) does not
22737 have an early register mult dependency on the result of
22741 arm_no_early_mul_dep (rtx producer, rtx consumer)
22743 rtx value = PATTERN (producer);
22744 rtx op = PATTERN (consumer);
22746 if (GET_CODE (value) == COND_EXEC)
22747 value = COND_EXEC_CODE (value);
22748 if (GET_CODE (value) == PARALLEL)
22749 value = XVECEXP (value, 0, 0);
22750 value = XEXP (value, 0);
22751 if (GET_CODE (op) == COND_EXEC)
22752 op = COND_EXEC_CODE (op);
22753 if (GET_CODE (op) == PARALLEL)
22754 op = XVECEXP (op, 0, 0);
22757 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
22759 if (GET_CODE (XEXP (op, 0)) == MULT)
22760 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
22762 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
22768 /* We can't rely on the caller doing the proper promotion when
22769 using APCS or ATPCS. */
22772 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
22774 return !TARGET_AAPCS_BASED;
22777 static enum machine_mode
22778 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
22779 enum machine_mode mode,
22780 int *punsignedp ATTRIBUTE_UNUSED,
22781 const_tree fntype ATTRIBUTE_UNUSED,
22782 int for_return ATTRIBUTE_UNUSED)
22784 if (GET_MODE_CLASS (mode) == MODE_INT
22785 && GET_MODE_SIZE (mode) < 4)
22791 /* AAPCS based ABIs use short enums by default. */
22794 arm_default_short_enums (void)
22796 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
22800 /* AAPCS requires that anonymous bitfields affect structure alignment. */
22803 arm_align_anon_bitfield (void)
22805 return TARGET_AAPCS_BASED;
22809 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
22812 arm_cxx_guard_type (void)
22814 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
22817 /* Return non-zero if the consumer (a multiply-accumulate instruction)
22818 has an accumulator dependency on the result of the producer (a
22819 multiplication instruction) and no other dependency on that result. */
22821 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
22823 rtx mul = PATTERN (producer);
22824 rtx mac = PATTERN (consumer);
22826 rtx mac_op0, mac_op1, mac_acc;
22828 if (GET_CODE (mul) == COND_EXEC)
22829 mul = COND_EXEC_CODE (mul);
22830 if (GET_CODE (mac) == COND_EXEC)
22831 mac = COND_EXEC_CODE (mac);
22833 /* Check that mul is of the form (set (...) (mult ...))
22834 and mla is of the form (set (...) (plus (mult ...) (...))). */
22835 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
22836 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
22837 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
22840 mul_result = XEXP (mul, 0);
22841 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
22842 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
22843 mac_acc = XEXP (XEXP (mac, 1), 1);
22845 return (reg_overlap_mentioned_p (mul_result, mac_acc)
22846 && !reg_overlap_mentioned_p (mul_result, mac_op0)
22847 && !reg_overlap_mentioned_p (mul_result, mac_op1));
22851 /* The EABI says test the least significant bit of a guard variable. */
22854 arm_cxx_guard_mask_bit (void)
22856 return TARGET_AAPCS_BASED;
22860 /* The EABI specifies that all array cookies are 8 bytes long. */
22863 arm_get_cookie_size (tree type)
22867 if (!TARGET_AAPCS_BASED)
22868 return default_cxx_get_cookie_size (type);
22870 size = build_int_cst (sizetype, 8);
22875 /* The EABI says that array cookies should also contain the element size. */
22878 arm_cookie_has_size (void)
22880 return TARGET_AAPCS_BASED;
22884 /* The EABI says constructors and destructors should return a pointer to
22885 the object constructed/destroyed. */
22888 arm_cxx_cdtor_returns_this (void)
22890 return TARGET_AAPCS_BASED;
22893 /* The EABI says that an inline function may never be the key
22897 arm_cxx_key_method_may_be_inline (void)
22899 return !TARGET_AAPCS_BASED;
22903 arm_cxx_determine_class_data_visibility (tree decl)
22905 if (!TARGET_AAPCS_BASED
22906 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
22909 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
22910 is exported. However, on systems without dynamic vague linkage,
22911 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
22912 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
22913 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
22915 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
22916 DECL_VISIBILITY_SPECIFIED (decl) = 1;
22920 arm_cxx_class_data_always_comdat (void)
22922 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22923 vague linkage if the class has no key function. */
22924 return !TARGET_AAPCS_BASED;
22928 /* The EABI says __aeabi_atexit should be used to register static
22932 arm_cxx_use_aeabi_atexit (void)
22934 return TARGET_AAPCS_BASED;
22939 arm_set_return_address (rtx source, rtx scratch)
22941 arm_stack_offsets *offsets;
22942 HOST_WIDE_INT delta;
22944 unsigned long saved_regs;
22946 offsets = arm_get_frame_offsets ();
22947 saved_regs = offsets->saved_regs_mask;
22949 if ((saved_regs & (1 << LR_REGNUM)) == 0)
22950 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22953 if (frame_pointer_needed)
22954 addr = plus_constant(hard_frame_pointer_rtx, -4);
22957 /* LR will be the first saved register. */
22958 delta = offsets->outgoing_args - (offsets->frame + 4);
22963 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
22964 GEN_INT (delta & ~4095)));
22969 addr = stack_pointer_rtx;
22971 addr = plus_constant (addr, delta);
22973 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22979 thumb_set_return_address (rtx source, rtx scratch)
22981 arm_stack_offsets *offsets;
22982 HOST_WIDE_INT delta;
22983 HOST_WIDE_INT limit;
22986 unsigned long mask;
22990 offsets = arm_get_frame_offsets ();
22991 mask = offsets->saved_regs_mask;
22992 if (mask & (1 << LR_REGNUM))
22995 /* Find the saved regs. */
22996 if (frame_pointer_needed)
22998 delta = offsets->soft_frame - offsets->saved_args;
22999 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
23005 delta = offsets->outgoing_args - offsets->saved_args;
23008 /* Allow for the stack frame. */
23009 if (TARGET_THUMB1 && TARGET_BACKTRACE)
23011 /* The link register is always the first saved register. */
23014 /* Construct the address. */
23015 addr = gen_rtx_REG (SImode, reg);
23018 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
23019 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
23023 addr = plus_constant (addr, delta);
23025 emit_move_insn (gen_frame_mem (Pmode, addr), source);
23028 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23031 /* Implements target hook vector_mode_supported_p. */
23033 arm_vector_mode_supported_p (enum machine_mode mode)
23035 /* Neon also supports V2SImode, etc. listed in the clause below. */
23036 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
23037 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
23040 if ((TARGET_NEON || TARGET_IWMMXT)
23041 && ((mode == V2SImode)
23042 || (mode == V4HImode)
23043 || (mode == V8QImode)))
23046 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
23047 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
23048 || mode == V2HAmode))
23054 /* Implements target hook array_mode_supported_p. */
23057 arm_array_mode_supported_p (enum machine_mode mode,
23058 unsigned HOST_WIDE_INT nelems)
23061 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
23062 && (nelems >= 2 && nelems <= 4))
23068 /* Use the option -mvectorize-with-neon-double to override the use of quardword
23069 registers when autovectorizing for Neon, at least until multiple vector
23070 widths are supported properly by the middle-end. */
23072 static enum machine_mode
23073 arm_preferred_simd_mode (enum machine_mode mode)
23079 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
23081 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
23083 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
23085 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
23087 if (!TARGET_NEON_VECTORIZE_DOUBLE)
23094 if (TARGET_REALLY_IWMMXT)
23110 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
23112 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
23113 using r0-r4 for function arguments, r7 for the stack frame and don't have
23114 enough left over to do doubleword arithmetic. For Thumb-2 all the
23115 potentially problematic instructions accept high registers so this is not
23116 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
23117 that require many low registers. */
23119 arm_class_likely_spilled_p (reg_class_t rclass)
23121 if ((TARGET_THUMB1 && rclass == LO_REGS)
23122 || rclass == CC_REG)
23128 /* Implements target hook small_register_classes_for_mode_p. */
23130 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
23132 return TARGET_THUMB1;
23135 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
23136 ARM insns and therefore guarantee that the shift count is modulo 256.
23137 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
23138 guarantee no particular behavior for out-of-range counts. */
23140 static unsigned HOST_WIDE_INT
23141 arm_shift_truncation_mask (enum machine_mode mode)
23143 return mode == SImode ? 255 : 0;
23147 /* Map internal gcc register numbers to DWARF2 register numbers. */
23150 arm_dbx_register_number (unsigned int regno)
23155 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
23156 compatibility. The EABI defines them as registers 96-103. */
23157 if (IS_FPA_REGNUM (regno))
23158 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
23160 if (IS_VFP_REGNUM (regno))
23162 /* See comment in arm_dwarf_register_span. */
23163 if (VFP_REGNO_OK_FOR_SINGLE (regno))
23164 return 64 + regno - FIRST_VFP_REGNUM;
23166 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
23169 if (IS_IWMMXT_GR_REGNUM (regno))
23170 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
23172 if (IS_IWMMXT_REGNUM (regno))
23173 return 112 + regno - FIRST_IWMMXT_REGNUM;
23175 gcc_unreachable ();
23178 /* Dwarf models VFPv3 registers as 32 64-bit registers.
23179 GCC models tham as 64 32-bit registers, so we need to describe this to
23180 the DWARF generation code. Other registers can use the default. */
23182 arm_dwarf_register_span (rtx rtl)
23189 regno = REGNO (rtl);
23190 if (!IS_VFP_REGNUM (regno))
23193 /* XXX FIXME: The EABI defines two VFP register ranges:
23194 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
23196 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
23197 corresponding D register. Until GDB supports this, we shall use the
23198 legacy encodings. We also use these encodings for D0-D15 for
23199 compatibility with older debuggers. */
23200 if (VFP_REGNO_OK_FOR_SINGLE (regno))
23203 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
23204 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
23205 regno = (regno - FIRST_VFP_REGNUM) / 2;
23206 for (i = 0; i < nregs; i++)
23207 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
23212 #if ARM_UNWIND_INFO
23213 /* Emit unwind directives for a store-multiple instruction or stack pointer
23214 push during alignment.
23215 These should only ever be generated by the function prologue code, so
23216 expect them to have a particular form. */
23219 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
23222 HOST_WIDE_INT offset;
23223 HOST_WIDE_INT nregs;
23229 e = XVECEXP (p, 0, 0);
23230 if (GET_CODE (e) != SET)
23233 /* First insn will adjust the stack pointer. */
23234 if (GET_CODE (e) != SET
23235 || GET_CODE (XEXP (e, 0)) != REG
23236 || REGNO (XEXP (e, 0)) != SP_REGNUM
23237 || GET_CODE (XEXP (e, 1)) != PLUS)
23240 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
23241 nregs = XVECLEN (p, 0) - 1;
23243 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
23246 /* The function prologue may also push pc, but not annotate it as it is
23247 never restored. We turn this into a stack pointer adjustment. */
23248 if (nregs * 4 == offset - 4)
23250 fprintf (asm_out_file, "\t.pad #4\n");
23254 fprintf (asm_out_file, "\t.save {");
23256 else if (IS_VFP_REGNUM (reg))
23259 fprintf (asm_out_file, "\t.vsave {");
23261 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
23263 /* FPA registers are done differently. */
23264 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
23268 /* Unknown register type. */
23271 /* If the stack increment doesn't match the size of the saved registers,
23272 something has gone horribly wrong. */
23273 if (offset != nregs * reg_size)
23278 /* The remaining insns will describe the stores. */
23279 for (i = 1; i <= nregs; i++)
23281 /* Expect (set (mem <addr>) (reg)).
23282 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
23283 e = XVECEXP (p, 0, i);
23284 if (GET_CODE (e) != SET
23285 || GET_CODE (XEXP (e, 0)) != MEM
23286 || GET_CODE (XEXP (e, 1)) != REG)
23289 reg = REGNO (XEXP (e, 1));
23294 fprintf (asm_out_file, ", ");
23295 /* We can't use %r for vfp because we need to use the
23296 double precision register names. */
23297 if (IS_VFP_REGNUM (reg))
23298 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
23300 asm_fprintf (asm_out_file, "%r", reg);
23302 #ifdef ENABLE_CHECKING
23303 /* Check that the addresses are consecutive. */
23304 e = XEXP (XEXP (e, 0), 0);
23305 if (GET_CODE (e) == PLUS)
23307 offset += reg_size;
23308 if (GET_CODE (XEXP (e, 0)) != REG
23309 || REGNO (XEXP (e, 0)) != SP_REGNUM
23310 || GET_CODE (XEXP (e, 1)) != CONST_INT
23311 || offset != INTVAL (XEXP (e, 1)))
23315 || GET_CODE (e) != REG
23316 || REGNO (e) != SP_REGNUM)
23320 fprintf (asm_out_file, "}\n");
23323 /* Emit unwind directives for a SET. */
23326 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
23334 switch (GET_CODE (e0))
23337 /* Pushing a single register. */
23338 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
23339 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
23340 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
23343 asm_fprintf (asm_out_file, "\t.save ");
23344 if (IS_VFP_REGNUM (REGNO (e1)))
23345 asm_fprintf(asm_out_file, "{d%d}\n",
23346 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
23348 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
23352 if (REGNO (e0) == SP_REGNUM)
23354 /* A stack increment. */
23355 if (GET_CODE (e1) != PLUS
23356 || GET_CODE (XEXP (e1, 0)) != REG
23357 || REGNO (XEXP (e1, 0)) != SP_REGNUM
23358 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23361 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
23362 -INTVAL (XEXP (e1, 1)));
23364 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
23366 HOST_WIDE_INT offset;
23368 if (GET_CODE (e1) == PLUS)
23370 if (GET_CODE (XEXP (e1, 0)) != REG
23371 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
23373 reg = REGNO (XEXP (e1, 0));
23374 offset = INTVAL (XEXP (e1, 1));
23375 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
23376 HARD_FRAME_POINTER_REGNUM, reg,
23379 else if (GET_CODE (e1) == REG)
23382 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
23383 HARD_FRAME_POINTER_REGNUM, reg);
23388 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
23390 /* Move from sp to reg. */
23391 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
23393 else if (GET_CODE (e1) == PLUS
23394 && GET_CODE (XEXP (e1, 0)) == REG
23395 && REGNO (XEXP (e1, 0)) == SP_REGNUM
23396 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
23398 /* Set reg to offset from sp. */
23399 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
23400 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
23412 /* Emit unwind directives for the given insn. */
23415 arm_unwind_emit (FILE * asm_out_file, rtx insn)
23418 bool handled_one = false;
23420 if (arm_except_unwind_info (&global_options) != UI_TARGET)
23423 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
23424 && (TREE_NOTHROW (current_function_decl)
23425 || crtl->all_throwers_are_sibcalls))
23428 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
23431 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
23433 pat = XEXP (note, 0);
23434 switch (REG_NOTE_KIND (note))
23436 case REG_FRAME_RELATED_EXPR:
23439 case REG_CFA_REGISTER:
23442 pat = PATTERN (insn);
23443 if (GET_CODE (pat) == PARALLEL)
23444 pat = XVECEXP (pat, 0, 0);
23447 /* Only emitted for IS_STACKALIGN re-alignment. */
23452 src = SET_SRC (pat);
23453 dest = SET_DEST (pat);
23455 gcc_assert (src == stack_pointer_rtx);
23456 reg = REGNO (dest);
23457 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
23460 handled_one = true;
23463 case REG_CFA_DEF_CFA:
23464 case REG_CFA_EXPRESSION:
23465 case REG_CFA_ADJUST_CFA:
23466 case REG_CFA_OFFSET:
23467 /* ??? Only handling here what we actually emit. */
23468 gcc_unreachable ();
23476 pat = PATTERN (insn);
23479 switch (GET_CODE (pat))
23482 arm_unwind_emit_set (asm_out_file, pat);
23486 /* Store multiple. */
23487 arm_unwind_emit_sequence (asm_out_file, pat);
23496 /* Output a reference from a function exception table to the type_info
23497 object X. The EABI specifies that the symbol should be relocated by
23498 an R_ARM_TARGET2 relocation. */
23501 arm_output_ttype (rtx x)
23503 fputs ("\t.word\t", asm_out_file);
23504 output_addr_const (asm_out_file, x);
23505 /* Use special relocations for symbol references. */
23506 if (GET_CODE (x) != CONST_INT)
23507 fputs ("(TARGET2)", asm_out_file);
23508 fputc ('\n', asm_out_file);
23513 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
23516 arm_asm_emit_except_personality (rtx personality)
23518 fputs ("\t.personality\t", asm_out_file);
23519 output_addr_const (asm_out_file, personality);
23520 fputc ('\n', asm_out_file);
23523 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
23526 arm_asm_init_sections (void)
23528 exception_section = get_unnamed_section (0, output_section_asm_op,
23531 #endif /* ARM_UNWIND_INFO */
23533 /* Output unwind directives for the start/end of a function. */
23536 arm_output_fn_unwind (FILE * f, bool prologue)
23538 if (arm_except_unwind_info (&global_options) != UI_TARGET)
23542 fputs ("\t.fnstart\n", f);
23545 /* If this function will never be unwound, then mark it as such.
23546 The came condition is used in arm_unwind_emit to suppress
23547 the frame annotations. */
23548 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
23549 && (TREE_NOTHROW (current_function_decl)
23550 || crtl->all_throwers_are_sibcalls))
23551 fputs("\t.cantunwind\n", f);
23553 fputs ("\t.fnend\n", f);
23558 arm_emit_tls_decoration (FILE *fp, rtx x)
23560 enum tls_reloc reloc;
23563 val = XVECEXP (x, 0, 0);
23564 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
23566 output_addr_const (fp, val);
23571 fputs ("(tlsgd)", fp);
23574 fputs ("(tlsldm)", fp);
23577 fputs ("(tlsldo)", fp);
23580 fputs ("(gottpoff)", fp);
23583 fputs ("(tpoff)", fp);
23586 fputs ("(tlsdesc)", fp);
23589 gcc_unreachable ();
23598 fputs (" + (. - ", fp);
23599 output_addr_const (fp, XVECEXP (x, 0, 2));
23600 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
23601 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
23602 output_addr_const (fp, XVECEXP (x, 0, 3));
23612 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
23615 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
23617 gcc_assert (size == 4);
23618 fputs ("\t.word\t", file);
23619 output_addr_const (file, x);
23620 fputs ("(tlsldo)", file);
23623 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
23626 arm_output_addr_const_extra (FILE *fp, rtx x)
23628 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
23629 return arm_emit_tls_decoration (fp, x);
23630 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
23633 int labelno = INTVAL (XVECEXP (x, 0, 0));
23635 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
23636 assemble_name_raw (fp, label);
23640 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
23642 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
23646 output_addr_const (fp, XVECEXP (x, 0, 0));
23650 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
23652 output_addr_const (fp, XVECEXP (x, 0, 0));
23656 output_addr_const (fp, XVECEXP (x, 0, 1));
23660 else if (GET_CODE (x) == CONST_VECTOR)
23661 return arm_emit_vector_const (fp, x);
23666 /* Output assembly for a shift instruction.
23667 SET_FLAGS determines how the instruction modifies the condition codes.
23668 0 - Do not set condition codes.
23669 1 - Set condition codes.
23670 2 - Use smallest instruction. */
23672 arm_output_shift(rtx * operands, int set_flags)
23675 static const char flag_chars[3] = {'?', '.', '!'};
23680 c = flag_chars[set_flags];
23681 if (TARGET_UNIFIED_ASM)
23683 shift = shift_op(operands[3], &val);
23687 operands[2] = GEN_INT(val);
23688 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
23691 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
23694 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
23695 output_asm_insn (pattern, operands);
23699 /* Output a Thumb-1 casesi dispatch sequence. */
23701 thumb1_output_casesi (rtx *operands)
23703 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
23705 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
23707 switch (GET_MODE(diff_vec))
23710 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
23711 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
23713 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
23714 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
23716 return "bl\t%___gnu_thumb1_case_si";
23718 gcc_unreachable ();
23722 /* Output a Thumb-2 casesi instruction. */
23724 thumb2_output_casesi (rtx *operands)
23726 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
23728 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
23730 output_asm_insn ("cmp\t%0, %1", operands);
23731 output_asm_insn ("bhi\t%l3", operands);
23732 switch (GET_MODE(diff_vec))
23735 return "tbb\t[%|pc, %0]";
23737 return "tbh\t[%|pc, %0, lsl #1]";
23741 output_asm_insn ("adr\t%4, %l2", operands);
23742 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
23743 output_asm_insn ("add\t%4, %4, %5", operands);
23748 output_asm_insn ("adr\t%4, %l2", operands);
23749 return "ldr\t%|pc, [%4, %0, lsl #2]";
23752 gcc_unreachable ();
23756 /* Most ARM cores are single issue, but some newer ones can dual issue.
23757 The scheduler descriptions rely on this being correct. */
23759 arm_issue_rate (void)
23778 /* A table and a function to perform ARM-specific name mangling for
23779 NEON vector types in order to conform to the AAPCS (see "Procedure
23780 Call Standard for the ARM Architecture", Appendix A). To qualify
23781 for emission with the mangled names defined in that document, a
23782 vector type must not only be of the correct mode but also be
23783 composed of NEON vector element types (e.g. __builtin_neon_qi). */
23786 enum machine_mode mode;
23787 const char *element_type_name;
23788 const char *aapcs_name;
23789 } arm_mangle_map_entry;
23791 static arm_mangle_map_entry arm_mangle_map[] = {
23792 /* 64-bit containerized types. */
23793 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
23794 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
23795 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
23796 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
23797 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
23798 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
23799 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
23800 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
23801 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
23802 /* 128-bit containerized types. */
23803 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
23804 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
23805 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
23806 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
23807 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
23808 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
23809 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
23810 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
23811 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
23812 { VOIDmode, NULL, NULL }
23816 arm_mangle_type (const_tree type)
23818 arm_mangle_map_entry *pos = arm_mangle_map;
23820 /* The ARM ABI documents (10th October 2008) say that "__va_list"
23821 has to be managled as if it is in the "std" namespace. */
23822 if (TARGET_AAPCS_BASED
23823 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
23825 static bool warned;
23826 if (!warned && warn_psabi && !in_system_header)
23829 inform (input_location,
23830 "the mangling of %<va_list%> has changed in GCC 4.4");
23832 return "St9__va_list";
23835 /* Half-precision float. */
23836 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
23839 if (TREE_CODE (type) != VECTOR_TYPE)
23842 /* Check the mode of the vector type, and the name of the vector
23843 element type, against the table. */
23844 while (pos->mode != VOIDmode)
23846 tree elt_type = TREE_TYPE (type);
23848 if (pos->mode == TYPE_MODE (type)
23849 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
23850 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
23851 pos->element_type_name))
23852 return pos->aapcs_name;
23857 /* Use the default mangling for unrecognized (possibly user-defined)
23862 /* Order of allocation of core registers for Thumb: this allocation is
23863 written over the corresponding initial entries of the array
23864 initialized with REG_ALLOC_ORDER. We allocate all low registers
23865 first. Saving and restoring a low register is usually cheaper than
23866 using a call-clobbered high register. */
23868 static const int thumb_core_reg_alloc_order[] =
23870 3, 2, 1, 0, 4, 5, 6, 7,
23871 14, 12, 8, 9, 10, 11, 13, 15
23874 /* Adjust register allocation order when compiling for Thumb. */
23877 arm_order_regs_for_local_alloc (void)
23879 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
23880 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
23882 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
23883 sizeof (thumb_core_reg_alloc_order));
23886 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
23889 arm_frame_pointer_required (void)
23891 return (cfun->has_nonlocal_label
23892 || SUBTARGET_FRAME_POINTER_REQUIRED
23893 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
23896 /* Only thumb1 can't support conditional execution, so return true if
23897 the target is not thumb1. */
23899 arm_have_conditional_execution (void)
23901 return !TARGET_THUMB1;
23904 /* Legitimize a memory reference for sync primitive implemented using
23905 ldrex / strex. We currently force the form of the reference to be
23906 indirect without offset. We do not yet support the indirect offset
23907 addressing supported by some ARM targets for these
23910 arm_legitimize_sync_memory (rtx memory)
23912 rtx addr = force_reg (Pmode, XEXP (memory, 0));
23913 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
23915 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
23916 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
23917 return legitimate_memory;
23920 /* An instruction emitter. */
23921 typedef void (* emit_f) (int label, const char *, rtx *);
23923 /* An instruction emitter that emits via the conventional
23924 output_asm_insn. */
23926 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
23928 output_asm_insn (pattern, operands);
23931 /* Count the number of emitted synchronization instructions. */
23932 static unsigned arm_insn_count;
23934 /* An emitter that counts emitted instructions but does not actually
23935 emit instruction into the instruction stream. */
23937 arm_count (int label,
23938 const char *pattern ATTRIBUTE_UNUSED,
23939 rtx *operands ATTRIBUTE_UNUSED)
23945 /* Construct a pattern using conventional output formatting and feed
23946 it to output_asm_insn. Provides a mechanism to construct the
23947 output pattern on the fly. Note the hard limit on the pattern
23949 static void ATTRIBUTE_PRINTF_4
23950 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
23951 const char *pattern, ...)
23956 va_start (ap, pattern);
23957 vsprintf (buffer, pattern, ap);
23959 emit (label, buffer, operands);
23962 /* Emit the memory barrier instruction, if any, provided by this
23963 target to a specified emitter. */
23965 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23967 if (TARGET_HAVE_DMB)
23969 /* Note we issue a system level barrier. We should consider
23970 issuing a inner shareabilty zone barrier here instead, ie.
23972 emit (0, "dmb\tsy", operands);
23976 if (TARGET_HAVE_DMB_MCR)
23978 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23982 gcc_unreachable ();
23985 /* Emit the memory barrier instruction, if any, provided by this
23988 arm_output_memory_barrier (rtx *operands)
23990 arm_process_output_memory_barrier (arm_emit, operands);
23994 /* Helper to figure out the instruction suffix required on ldrex/strex
23995 for operations on an object of the specified mode. */
23996 static const char *
23997 arm_ldrex_suffix (enum machine_mode mode)
24001 case QImode: return "b";
24002 case HImode: return "h";
24003 case SImode: return "";
24004 case DImode: return "d";
24006 gcc_unreachable ();
24011 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
24014 arm_output_ldrex (emit_f emit,
24015 enum machine_mode mode,
24019 const char *suffix = arm_ldrex_suffix (mode);
24022 operands[0] = target;
24023 operands[1] = memory;
24024 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
24027 /* Emit a strex{b,h,d, } instruction appropriate for the specified
24030 arm_output_strex (emit_f emit,
24031 enum machine_mode mode,
24037 const char *suffix = arm_ldrex_suffix (mode);
24040 operands[0] = result;
24041 operands[1] = value;
24042 operands[2] = memory;
24043 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
24047 /* Helper to emit a two operand instruction. */
24049 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
24055 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
24058 /* Helper to emit a three operand instruction. */
24060 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
24067 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
24070 /* Emit a load store exclusive synchronization loop.
24074 if old_value != required_value
24076 t1 = sync_op (old_value, new_value)
24077 [mem] = t1, t2 = [0|1]
24081 t1 == t2 is not permitted
24082 t1 == old_value is permitted
24086 RTX register or const_int representing the required old_value for
24087 the modify to continue, if NULL no comparsion is performed. */
24089 arm_output_sync_loop (emit_f emit,
24090 enum machine_mode mode,
24093 rtx required_value,
24097 enum attr_sync_op sync_op,
24098 int early_barrier_required)
24102 gcc_assert (t1 != t2);
24104 if (early_barrier_required)
24105 arm_process_output_memory_barrier (emit, NULL);
24107 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
24109 arm_output_ldrex (emit, mode, old_value, memory);
24111 if (required_value)
24115 operands[0] = old_value;
24116 operands[1] = required_value;
24117 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
24118 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
24124 arm_output_op3 (emit, "add", t1, old_value, new_value);
24128 arm_output_op3 (emit, "sub", t1, old_value, new_value);
24132 arm_output_op3 (emit, "orr", t1, old_value, new_value);
24136 arm_output_op3 (emit, "eor", t1, old_value, new_value);
24140 arm_output_op3 (emit,"and", t1, old_value, new_value);
24144 arm_output_op3 (emit, "and", t1, old_value, new_value);
24145 arm_output_op2 (emit, "mvn", t1, t1);
24155 arm_output_strex (emit, mode, "", t2, t1, memory);
24157 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
24158 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
24159 LOCAL_LABEL_PREFIX);
24163 /* Use old_value for the return value because for some operations
24164 the old_value can easily be restored. This saves one register. */
24165 arm_output_strex (emit, mode, "", old_value, t1, memory);
24166 operands[0] = old_value;
24167 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
24168 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
24169 LOCAL_LABEL_PREFIX);
24174 arm_output_op3 (emit, "sub", old_value, t1, new_value);
24178 arm_output_op3 (emit, "add", old_value, t1, new_value);
24182 arm_output_op3 (emit, "eor", old_value, t1, new_value);
24186 arm_output_op2 (emit, "mov", old_value, required_value);
24190 gcc_unreachable ();
24194 arm_process_output_memory_barrier (emit, NULL);
24195 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
24199 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
24202 default_value = operands[index - 1];
24204 return default_value;
24207 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
24208 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
24210 /* Extract the operands for a synchroniztion instruction from the
24211 instructions attributes and emit the instruction. */
24213 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
24215 rtx result, memory, required_value, new_value, t1, t2;
24217 enum machine_mode mode;
24218 enum attr_sync_op sync_op;
24220 result = FETCH_SYNC_OPERAND(result, 0);
24221 memory = FETCH_SYNC_OPERAND(memory, 0);
24222 required_value = FETCH_SYNC_OPERAND(required_value, 0);
24223 new_value = FETCH_SYNC_OPERAND(new_value, 0);
24224 t1 = FETCH_SYNC_OPERAND(t1, 0);
24225 t2 = FETCH_SYNC_OPERAND(t2, 0);
24227 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
24228 sync_op = get_attr_sync_op (insn);
24229 mode = GET_MODE (memory);
24231 arm_output_sync_loop (emit, mode, result, memory, required_value,
24232 new_value, t1, t2, sync_op, early_barrier);
24235 /* Emit a synchronization instruction loop. */
24237 arm_output_sync_insn (rtx insn, rtx *operands)
24239 arm_process_output_sync_insn (arm_emit, insn, operands);
24243 /* Count the number of machine instruction that will be emitted for a
24244 synchronization instruction. Note that the emitter used does not
24245 emit instructions, it just counts instructions being carefull not
24246 to count labels. */
24248 arm_sync_loop_insns (rtx insn, rtx *operands)
24250 arm_insn_count = 0;
24251 arm_process_output_sync_insn (arm_count, insn, operands);
24252 return arm_insn_count;
24255 /* Helper to call a target sync instruction generator, dealing with
24256 the variation in operands required by the different generators. */
24258 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
24259 rtx memory, rtx required_value, rtx new_value)
24261 switch (generator->op)
24263 case arm_sync_generator_omn:
24264 gcc_assert (! required_value);
24265 return generator->u.omn (old_value, memory, new_value);
24267 case arm_sync_generator_omrn:
24268 gcc_assert (required_value);
24269 return generator->u.omrn (old_value, memory, required_value, new_value);
24275 /* Expand a synchronization loop. The synchronization loop is expanded
24276 as an opaque block of instructions in order to ensure that we do
24277 not subsequently get extraneous memory accesses inserted within the
24278 critical region. The exclusive access property of ldrex/strex is
24279 only guaranteed in there are no intervening memory accesses. */
24281 arm_expand_sync (enum machine_mode mode,
24282 struct arm_sync_generator *generator,
24283 rtx target, rtx memory, rtx required_value, rtx new_value)
24285 if (target == NULL)
24286 target = gen_reg_rtx (mode);
24288 memory = arm_legitimize_sync_memory (memory);
24289 if (mode != SImode)
24291 rtx load_temp = gen_reg_rtx (SImode);
24293 if (required_value)
24294 required_value = convert_modes (SImode, mode, required_value, true);
24296 new_value = convert_modes (SImode, mode, new_value, true);
24297 emit_insn (arm_call_generator (generator, load_temp, memory,
24298 required_value, new_value));
24299 emit_move_insn (target, gen_lowpart (mode, load_temp));
24303 emit_insn (arm_call_generator (generator, target, memory, required_value,
24308 static unsigned int
24309 arm_autovectorize_vector_sizes (void)
24311 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
24315 arm_vector_alignment_reachable (const_tree type, bool is_packed)
24317 /* Vectors which aren't in packed structures will not be less aligned than
24318 the natural alignment of their element type, so this is safe. */
24319 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24322 return default_builtin_vector_alignment_reachable (type, is_packed);
24326 arm_builtin_support_vector_misalignment (enum machine_mode mode,
24327 const_tree type, int misalignment,
24330 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24332 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
24337 /* If the misalignment is unknown, we should be able to handle the access
24338 so long as it is not to a member of a packed data structure. */
24339 if (misalignment == -1)
24342 /* Return true if the misalignment is a multiple of the natural alignment
24343 of the vector's element type. This is probably always going to be
24344 true in practice, since we've already established that this isn't a
24346 return ((misalignment % align) == 0);
24349 return default_builtin_support_vector_misalignment (mode, type, misalignment,
24354 arm_conditional_register_usage (void)
24358 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
24360 for (regno = FIRST_FPA_REGNUM;
24361 regno <= LAST_FPA_REGNUM; ++regno)
24362 fixed_regs[regno] = call_used_regs[regno] = 1;
24365 if (TARGET_THUMB1 && optimize_size)
24367 /* When optimizing for size on Thumb-1, it's better not
24368 to use the HI regs, because of the overhead of
24370 for (regno = FIRST_HI_REGNUM;
24371 regno <= LAST_HI_REGNUM; ++regno)
24372 fixed_regs[regno] = call_used_regs[regno] = 1;
24375 /* The link register can be clobbered by any branch insn,
24376 but we have no way to track that at present, so mark
24377 it as unavailable. */
24379 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
24381 if (TARGET_32BIT && TARGET_HARD_FLOAT)
24383 if (TARGET_MAVERICK)
24385 for (regno = FIRST_FPA_REGNUM;
24386 regno <= LAST_FPA_REGNUM; ++ regno)
24387 fixed_regs[regno] = call_used_regs[regno] = 1;
24388 for (regno = FIRST_CIRRUS_FP_REGNUM;
24389 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
24391 fixed_regs[regno] = 0;
24392 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
24397 /* VFPv3 registers are disabled when earlier VFP
24398 versions are selected due to the definition of
24399 LAST_VFP_REGNUM. */
24400 for (regno = FIRST_VFP_REGNUM;
24401 regno <= LAST_VFP_REGNUM; ++ regno)
24403 fixed_regs[regno] = 0;
24404 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
24405 || regno >= FIRST_VFP_REGNUM + 32;
24410 if (TARGET_REALLY_IWMMXT)
24412 regno = FIRST_IWMMXT_GR_REGNUM;
24413 /* The 2002/10/09 revision of the XScale ABI has wCG0
24414 and wCG1 as call-preserved registers. The 2002/11/21
24415 revision changed this so that all wCG registers are
24416 scratch registers. */
24417 for (regno = FIRST_IWMMXT_GR_REGNUM;
24418 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
24419 fixed_regs[regno] = 0;
24420 /* The XScale ABI has wR0 - wR9 as scratch registers,
24421 the rest as call-preserved registers. */
24422 for (regno = FIRST_IWMMXT_REGNUM;
24423 regno <= LAST_IWMMXT_REGNUM; ++ regno)
24425 fixed_regs[regno] = 0;
24426 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
24430 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
24432 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24433 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24435 else if (TARGET_APCS_STACK)
24437 fixed_regs[10] = 1;
24438 call_used_regs[10] = 1;
24440 /* -mcaller-super-interworking reserves r11 for calls to
24441 _interwork_r11_call_via_rN(). Making the register global
24442 is an easy way of ensuring that it remains valid for all
24444 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
24445 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
24447 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24448 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24449 if (TARGET_CALLER_INTERWORKING)
24450 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24452 SUBTARGET_CONDITIONAL_REGISTER_USAGE
24456 arm_preferred_rename_class (reg_class_t rclass)
24458 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
24459 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
24460 and code size can be reduced. */
24461 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
24467 /* Compute the atrribute "length" of insn "*push_multi".
24468 So this function MUST be kept in sync with that insn pattern. */
24470 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
24472 int i, regno, hi_reg;
24473 int num_saves = XVECLEN (parallel_op, 0);
24483 regno = REGNO (first_op);
24484 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
24485 for (i = 1; i < num_saves && !hi_reg; i++)
24487 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
24488 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
24496 /* Compute the number of instructions emitted by output_move_double. */
24498 arm_count_output_move_double_insns (rtx *operands)
24501 output_move_double (operands, false, &count);
24505 #include "gt-arm.h"